diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/filemap.c | 53 | ||||
| -rw-r--r-- | mm/memory.c | 153 | ||||
| -rw-r--r-- | mm/shmem.c | 11 | ||||
| -rw-r--r-- | mm/truncate.c | 13 | 
4 files changed, 114 insertions, 116 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 5d5449f3d41c..462cda58a18e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1325,9 +1325,10 @@ struct page *filemap_nopage(struct vm_area_struct *area,  	unsigned long size, pgoff;  	int did_readaround = 0, majmin = VM_FAULT_MINOR; +	BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE)); +  	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; -retry_all:  	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;  	if (pgoff >= size)  		goto outside_data_content; @@ -1349,7 +1350,7 @@ retry_all:  	 * Do we have something in the page cache already?  	 */  retry_find: -	page = find_get_page(mapping, pgoff); +	page = find_lock_page(mapping, pgoff);  	if (!page) {  		unsigned long ra_pages; @@ -1383,7 +1384,7 @@ retry_find:  				start = pgoff - ra_pages / 2;  			do_page_cache_readahead(mapping, file, start, ra_pages);  		} -		page = find_get_page(mapping, pgoff); +		page = find_lock_page(mapping, pgoff);  		if (!page)  			goto no_cached_page;  	} @@ -1392,13 +1393,19 @@ retry_find:  		ra->mmap_hit++;  	/* -	 * Ok, found a page in the page cache, now we need to check -	 * that it's up-to-date. +	 * We have a locked page in the page cache, now we need to check +	 * that it's up-to-date. If not, it is going to be due to an error.  	 */ -	if (!PageUptodate(page)) +	if (unlikely(!PageUptodate(page)))  		goto page_not_uptodate; -success: +	/* Must recheck i_size under page lock */ +	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +	if (unlikely(pgoff >= size)) { +		unlock_page(page); +		goto outside_data_content; +	} +  	/*  	 * Found the page and have a reference on it.  	 */ @@ -1440,6 +1447,7 @@ no_cached_page:  	return NOPAGE_SIGBUS;  page_not_uptodate: +	/* IO error path */  	if (!did_readaround) {  		majmin = VM_FAULT_MAJOR;  		count_vm_event(PGMAJFAULT); @@ -1451,37 +1459,15 @@ page_not_uptodate:  	 * because there really aren't any performance issues here  	 * and we need to check for errors.  	 */ -	lock_page(page); - -	/* Somebody truncated the page on us? */ -	if (!page->mapping) { -		unlock_page(page); -		page_cache_release(page); -		goto retry_all; -	} - -	/* Somebody else successfully read it in? */ -	if (PageUptodate(page)) { -		unlock_page(page); -		goto success; -	}  	ClearPageError(page);  	error = mapping->a_ops->readpage(file, page); -	if (!error) { -		wait_on_page_locked(page); -		if (PageUptodate(page)) -			goto success; -	} else if (error == AOP_TRUNCATED_PAGE) { -		page_cache_release(page); +	page_cache_release(page); + +	if (!error || error == AOP_TRUNCATED_PAGE)  		goto retry_find; -	} -	/* -	 * Things didn't work out. Return zero to tell the -	 * mm layer so, possibly freeing the page cache page first. -	 */ +	/* Things didn't work out. Return zero to tell the mm layer so. */  	shrink_readahead_size_eio(file, ra); -	page_cache_release(page);  	return NOPAGE_SIGBUS;  }  EXPORT_SYMBOL(filemap_nopage); @@ -1674,6 +1660,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)  		return -ENOEXEC;  	file_accessed(file);  	vma->vm_ops = &generic_file_vm_ops; +	vma->vm_flags |= VM_CAN_INVALIDATE;  	return 0;  } diff --git a/mm/memory.c b/mm/memory.c index 9c6ff7fffdc8..e6c99f6b5649 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1831,6 +1831,13 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,  	unsigned long restart_addr;  	int need_break; +	/* +	 * files that support invalidating or truncating portions of the +	 * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and +	 * have their .nopage function return the page locked. +	 */ +	BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); +  again:  	restart_addr = vma->vm_truncate_count;  	if (is_restart_addr(restart_addr) && start_addr < restart_addr) { @@ -1959,17 +1966,8 @@ void unmap_mapping_range(struct address_space *mapping,  	spin_lock(&mapping->i_mmap_lock); -	/* serialize i_size write against truncate_count write */ -	smp_wmb(); -	/* Protect against page faults, and endless unmapping loops */ +	/* Protect against endless unmapping loops */  	mapping->truncate_count++; -	/* -	 * For archs where spin_lock has inclusive semantics like ia64 -	 * this smp_mb() will prevent to read pagetable contents -	 * before the truncate_count increment is visible to -	 * other cpus. -	 */ -	smp_mb();  	if (unlikely(is_restart_addr(mapping->truncate_count))) {  		if (mapping->truncate_count == 0)  			reset_vma_truncate_counts(mapping); @@ -2008,8 +2006,18 @@ int vmtruncate(struct inode * inode, loff_t offset)  	if (IS_SWAPFILE(inode))  		goto out_busy;  	i_size_write(inode, offset); + +	/* +	 * unmap_mapping_range is called twice, first simply for efficiency +	 * so that truncate_inode_pages does fewer single-page unmaps. However +	 * after this first call, and before truncate_inode_pages finishes, +	 * it is possible for private pages to be COWed, which remain after +	 * truncate_inode_pages finishes, hence the second unmap_mapping_range +	 * call must be made for correctness. +	 */  	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);  	truncate_inode_pages(mapping, offset); +	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);  	goto out_truncate;  do_expand: @@ -2049,6 +2057,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)  	down_write(&inode->i_alloc_sem);  	unmap_mapping_range(mapping, offset, (end - offset), 1);  	truncate_inode_pages_range(mapping, offset, end); +	unmap_mapping_range(mapping, offset, (end - offset), 1);  	inode->i_op->truncate_range(inode, offset, end);  	up_write(&inode->i_alloc_sem);  	mutex_unlock(&inode->i_mutex); @@ -2206,7 +2215,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,  	/* No need to invalidate - it was non-present before */  	update_mmu_cache(vma, address, pte); -	lazy_mmu_prot_update(pte);  unlock:  	pte_unmap_unlock(page_table, ptl);  out: @@ -2297,10 +2305,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,  		int write_access)  {  	spinlock_t *ptl; -	struct page *new_page; -	struct address_space *mapping = NULL; +	struct page *page, *nopage_page;  	pte_t entry; -	unsigned int sequence = 0;  	int ret = VM_FAULT_MINOR;  	int anon = 0;  	struct page *dirty_page = NULL; @@ -2308,74 +2314,53 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,  	pte_unmap(page_table);  	BUG_ON(vma->vm_flags & VM_PFNMAP); -	if (vma->vm_file) { -		mapping = vma->vm_file->f_mapping; -		sequence = mapping->truncate_count; -		smp_rmb(); /* serializes i_size against truncate_count */ -	} -retry: -	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); -	/* -	 * No smp_rmb is needed here as long as there's a full -	 * spin_lock/unlock sequence inside the ->nopage callback -	 * (for the pagecache lookup) that acts as an implicit -	 * smp_mb() and prevents the i_size read to happen -	 * after the next truncate_count read. -	 */ - +	nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);  	/* no page was available -- either SIGBUS, OOM or REFAULT */ -	if (unlikely(new_page == NOPAGE_SIGBUS)) +	if (unlikely(nopage_page == NOPAGE_SIGBUS))  		return VM_FAULT_SIGBUS; -	else if (unlikely(new_page == NOPAGE_OOM)) +	else if (unlikely(nopage_page == NOPAGE_OOM))  		return VM_FAULT_OOM; -	else if (unlikely(new_page == NOPAGE_REFAULT)) +	else if (unlikely(nopage_page == NOPAGE_REFAULT))  		return VM_FAULT_MINOR; +	BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page)); +	/* +	 * For consistency in subsequent calls, make the nopage_page always +	 * locked. +	 */ +	if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) +		lock_page(nopage_page); +  	/*  	 * Should we do an early C-O-W break?  	 */ +	page = nopage_page;  	if (write_access) {  		if (!(vma->vm_flags & VM_SHARED)) { -			struct page *page; - -			if (unlikely(anon_vma_prepare(vma))) -				goto oom; -			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, -						vma, address); -			if (!page) -				goto oom; -			copy_user_highpage(page, new_page, address, vma); -			page_cache_release(new_page); -			new_page = page; +			if (unlikely(anon_vma_prepare(vma))) { +				ret = VM_FAULT_OOM; +				goto out_error; +			} +			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); +			if (!page) { +				ret = VM_FAULT_OOM; +				goto out_error; +			} +			copy_user_highpage(page, nopage_page, address, vma);  			anon = 1; -  		} else {  			/* if the page will be shareable, see if the backing  			 * address space wants to know that the page is about  			 * to become writable */  			if (vma->vm_ops->page_mkwrite && -			    vma->vm_ops->page_mkwrite(vma, new_page) < 0 -			    ) { -				page_cache_release(new_page); -				return VM_FAULT_SIGBUS; +			    vma->vm_ops->page_mkwrite(vma, page) < 0) { +				ret = VM_FAULT_SIGBUS; +				goto out_error;  			}  		}  	}  	page_table = pte_offset_map_lock(mm, pmd, address, &ptl); -	/* -	 * For a file-backed vma, someone could have truncated or otherwise -	 * invalidated this page.  If unmap_mapping_range got called, -	 * retry getting the page. -	 */ -	if (mapping && unlikely(sequence != mapping->truncate_count)) { -		pte_unmap_unlock(page_table, ptl); -		page_cache_release(new_page); -		cond_resched(); -		sequence = mapping->truncate_count; -		smp_rmb(); -		goto retry; -	}  	/*  	 * This silly early PAGE_DIRTY setting removes a race @@ -2388,43 +2373,51 @@ retry:  	 * handle that later.  	 */  	/* Only go through if we didn't race with anybody else... */ -	if (pte_none(*page_table)) { -		flush_icache_page(vma, new_page); -		entry = mk_pte(new_page, vma->vm_page_prot); +	if (likely(pte_none(*page_table))) { +		flush_icache_page(vma, page); +		entry = mk_pte(page, vma->vm_page_prot);  		if (write_access)  			entry = maybe_mkwrite(pte_mkdirty(entry), vma);  		set_pte_at(mm, address, page_table, entry);  		if (anon) { -			inc_mm_counter(mm, anon_rss); -			lru_cache_add_active(new_page); -			page_add_new_anon_rmap(new_page, vma, address); +                        inc_mm_counter(mm, anon_rss); +                        lru_cache_add_active(page); +                        page_add_new_anon_rmap(page, vma, address);  		} else {  			inc_mm_counter(mm, file_rss); -			page_add_file_rmap(new_page); +			page_add_file_rmap(page);  			if (write_access) { -				dirty_page = new_page; +				dirty_page = page;  				get_page(dirty_page);  			}  		} + +		/* no need to invalidate: a not-present page won't be cached */ +		update_mmu_cache(vma, address, entry); +		lazy_mmu_prot_update(entry);  	} else { -		/* One of our sibling threads was faster, back out. */ -		page_cache_release(new_page); -		goto unlock; +		if (anon) +			page_cache_release(page); +		else +			anon = 1; /* not anon, but release nopage_page */  	} -	/* no need to invalidate: a not-present page shouldn't be cached */ -	update_mmu_cache(vma, address, entry); -	lazy_mmu_prot_update(entry); -unlock:  	pte_unmap_unlock(page_table, ptl); -	if (dirty_page) { + +out: +	unlock_page(nopage_page); +	if (anon) +		page_cache_release(nopage_page); +	else if (dirty_page) {  		set_page_dirty_balance(dirty_page);  		put_page(dirty_page);  	} +  	return ret; -oom: -	page_cache_release(new_page); -	return VM_FAULT_OOM; + +out_error: +	anon = 1; /* relase nopage_page */ +	goto out;  }  /* diff --git a/mm/shmem.c b/mm/shmem.c index 96fa79fb6ad3..5808fadd3944 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -83,6 +83,7 @@ enum sgp_type {  	SGP_READ,	/* don't exceed i_size, don't allocate page */  	SGP_CACHE,	/* don't exceed i_size, may allocate page */  	SGP_WRITE,	/* may exceed i_size, may allocate page */ +	SGP_NOPAGE,	/* same as SGP_CACHE, return with page locked */  };  static int shmem_getpage(struct inode *inode, unsigned long idx, @@ -1289,8 +1290,10 @@ repeat:  	}  done:  	if (*pagep != filepage) { -		unlock_page(filepage);  		*pagep = filepage; +		if (sgp != SGP_NOPAGE) +			unlock_page(filepage); +  	}  	return 0; @@ -1310,13 +1313,15 @@ static struct page *shmem_nopage(struct vm_area_struct *vma,  	unsigned long idx;  	int error; +	BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); +  	idx = (address - vma->vm_start) >> PAGE_SHIFT;  	idx += vma->vm_pgoff;  	idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;  	if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))  		return NOPAGE_SIGBUS; -	error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); +	error = shmem_getpage(inode, idx, &page, SGP_NOPAGE, type);  	if (error)  		return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; @@ -1414,6 +1419,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)  {  	file_accessed(file);  	vma->vm_ops = &shmem_vm_ops; +	vma->vm_flags |= VM_CAN_INVALIDATE;  	return 0;  } @@ -2596,5 +2602,6 @@ int shmem_zero_setup(struct vm_area_struct *vma)  		fput(vma->vm_file);  	vma->vm_file = file;  	vma->vm_ops = &shmem_vm_ops; +	vma->vm_flags |= VM_CAN_INVALIDATE;  	return 0;  } diff --git a/mm/truncate.c b/mm/truncate.c index f47e46d1be3b..aed85f0b707f 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -192,6 +192,11 @@ void truncate_inode_pages_range(struct address_space *mapping,  				unlock_page(page);  				continue;  			} +			if (page_mapped(page)) { +				unmap_mapping_range(mapping, +				  (loff_t)page_index<<PAGE_CACHE_SHIFT, +				  PAGE_CACHE_SIZE, 0); +			}  			truncate_complete_page(mapping, page);  			unlock_page(page);  		} @@ -229,6 +234,11 @@ void truncate_inode_pages_range(struct address_space *mapping,  				break;  			lock_page(page);  			wait_on_page_writeback(page); +			if (page_mapped(page)) { +				unmap_mapping_range(mapping, +				  (loff_t)page->index<<PAGE_CACHE_SHIFT, +				  PAGE_CACHE_SIZE, 0); +			}  			if (page->index > next)  				next = page->index;  			next++; @@ -405,7 +415,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,  				break;  			}  			wait_on_page_writeback(page); -			while (page_mapped(page)) { +			if (page_mapped(page)) {  				if (!did_range_unmap) {  					/*  					 * Zap the rest of the file in one hit. @@ -425,6 +435,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,  					  PAGE_CACHE_SIZE, 0);  				}  			} +			BUG_ON(page_mapped(page));  			ret = do_launder_page(mapping, page);  			if (ret == 0 && !invalidate_complete_page2(mapping, page))  				ret = -EIO;  | 
