diff options
Diffstat (limited to 'fs/dax.c')
| -rw-r--r-- | fs/dax.c | 120 | 
1 files changed, 29 insertions, 91 deletions
@@ -24,6 +24,7 @@  #include <linux/sizes.h>  #include <linux/mmu_notifier.h>  #include <linux/iomap.h> +#include <linux/rmap.h>  #include <asm/pgalloc.h>  #define CREATE_TRACE_POINTS @@ -721,7 +722,8 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter  	int id;  	id = dax_read_lock(); -	rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL); +	rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, DAX_ACCESS, +				&kaddr, NULL);  	if (rc < 0) {  		dax_read_unlock(id);  		return rc; @@ -789,95 +791,12 @@ static void *dax_insert_entry(struct xa_state *xas,  	return entry;  } -static inline -unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma) -{ -	unsigned long address; - -	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); -	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); -	return address; -} - -/* Walk all mappings of a given index of a file and writeprotect them */ -static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, -		unsigned long pfn) -{ -	struct vm_area_struct *vma; -	pte_t pte, *ptep = NULL; -	pmd_t *pmdp = NULL; -	spinlock_t *ptl; - -	i_mmap_lock_read(mapping); -	vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { -		struct mmu_notifier_range range; -		unsigned long address; - -		cond_resched(); - -		if (!(vma->vm_flags & VM_SHARED)) -			continue; - -		address = pgoff_address(index, vma); - -		/* -		 * follow_invalidate_pte() will use the range to call -		 * mmu_notifier_invalidate_range_start() on our behalf before -		 * taking any lock. -		 */ -		if (follow_invalidate_pte(vma->vm_mm, address, &range, &ptep, -					  &pmdp, &ptl)) -			continue; - -		/* -		 * No need to call mmu_notifier_invalidate_range() as we are -		 * downgrading page table protection not changing it to point -		 * to a new page. -		 * -		 * See Documentation/vm/mmu_notifier.rst -		 */ -		if (pmdp) { -#ifdef CONFIG_FS_DAX_PMD -			pmd_t pmd; - -			if (pfn != pmd_pfn(*pmdp)) -				goto unlock_pmd; -			if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) -				goto unlock_pmd; - -			flush_cache_page(vma, address, pfn); -			pmd = pmdp_invalidate(vma, address, pmdp); -			pmd = pmd_wrprotect(pmd); -			pmd = pmd_mkclean(pmd); -			set_pmd_at(vma->vm_mm, address, pmdp, pmd); -unlock_pmd: -#endif -			spin_unlock(ptl); -		} else { -			if (pfn != pte_pfn(*ptep)) -				goto unlock_pte; -			if (!pte_dirty(*ptep) && !pte_write(*ptep)) -				goto unlock_pte; - -			flush_cache_page(vma, address, pfn); -			pte = ptep_clear_flush(vma, address, ptep); -			pte = pte_wrprotect(pte); -			pte = pte_mkclean(pte); -			set_pte_at(vma->vm_mm, address, ptep, pte); -unlock_pte: -			pte_unmap_unlock(ptep, ptl); -		} - -		mmu_notifier_invalidate_range_end(&range); -	} -	i_mmap_unlock_read(mapping); -} -  static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,  		struct address_space *mapping, void *entry)  { -	unsigned long pfn, index, count; +	unsigned long pfn, index, count, end;  	long ret = 0; +	struct vm_area_struct *vma;  	/*  	 * A page got tagged dirty in DAX mapping? Something is seriously @@ -935,8 +854,16 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,  	pfn = dax_to_pfn(entry);  	count = 1UL << dax_entry_order(entry);  	index = xas->xa_index & ~(count - 1); +	end = index + count - 1; + +	/* Walk all mappings of a given index of a file and writeprotect them */ +	i_mmap_lock_read(mapping); +	vma_interval_tree_foreach(vma, &mapping->i_mmap, index, end) { +		pfn_mkclean_range(pfn, count, index, vma); +		cond_resched(); +	} +	i_mmap_unlock_read(mapping); -	dax_entry_mkclean(mapping, index, pfn);  	dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);  	/*  	 * After we have flushed the cache, we can clear the dirty tag. There @@ -1013,7 +940,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,  	id = dax_read_lock();  	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), -				   NULL, pfnp); +				   DAX_ACCESS, NULL, pfnp);  	if (length < 0) {  		rc = length;  		goto out; @@ -1122,7 +1049,7 @@ static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,  	void *kaddr;  	long ret; -	ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); +	ret = dax_direct_access(dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, NULL);  	if (ret > 0) {  		memset(kaddr + offset, 0, size);  		dax_flush(dax_dev, kaddr + offset, size); @@ -1239,6 +1166,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,  		const size_t size = ALIGN(length + offset, PAGE_SIZE);  		pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);  		ssize_t map_len; +		bool recovery = false;  		void *kaddr;  		if (fatal_signal_pending(current)) { @@ -1247,7 +1175,14 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,  		}  		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), -				&kaddr, NULL); +				DAX_ACCESS, &kaddr, NULL); +		if (map_len == -EIO && iov_iter_rw(iter) == WRITE) { +			map_len = dax_direct_access(dax_dev, pgoff, +					PHYS_PFN(size), DAX_RECOVERY_WRITE, +					&kaddr, NULL); +			if (map_len > 0) +				recovery = true; +		}  		if (map_len < 0) {  			ret = map_len;  			break; @@ -1259,7 +1194,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,  		if (map_len > end - pos)  			map_len = end - pos; -		if (iov_iter_rw(iter) == WRITE) +		if (recovery) +			xfer = dax_recovery_write(dax_dev, pgoff, kaddr, +					map_len, iter); +		else if (iov_iter_rw(iter) == WRITE)  			xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,  					map_len, iter);  		else  | 
