diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 425 |
1 files changed, 247 insertions, 178 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index a8c69c8c0a90..daef091d4c50 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -95,8 +95,8 @@ * ->swap_lock (try_to_unmap_one) * ->private_lock (try_to_unmap_one) * ->tree_lock (try_to_unmap_one) - * ->zone.lru_lock (follow_page->mark_page_accessed) - * ->zone.lru_lock (check_pte_range->isolate_lru_page) + * ->zone_lru_lock(zone) (follow_page->mark_page_accessed) + * ->zone_lru_lock(zone) (check_pte_range->isolate_lru_page) * ->private_lock (page_remove_rmap->set_page_dirty) * ->tree_lock (page_remove_rmap->set_page_dirty) * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) @@ -114,17 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { struct radix_tree_node *node; - unsigned long index; - unsigned int offset; - unsigned int tag; - void **slot; - - VM_BUG_ON(!PageLocked(page)); + int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page); - __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(nr != 1 && shadow, page); if (shadow) { - mapping->nrexceptional++; + mapping->nrexceptional += nr; /* * Make sure the nrexceptional update is committed before * the nrpages update so that final truncate racing @@ -133,43 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping, */ smp_wmb(); } - mapping->nrpages--; - - if (!node) { - /* Clear direct pointer tags in root node */ - mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; - radix_tree_replace_slot(slot, shadow); - return; - } + mapping->nrpages -= nr; - /* Clear tree tags for the removed page */ - index = page->index; - offset = index & RADIX_TREE_MAP_MASK; - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (test_bit(offset, node->tags[tag])) - radix_tree_tag_clear(&mapping->page_tree, index, tag); - } - - /* Delete page, swap shadow entry */ - radix_tree_replace_slot(slot, shadow); - workingset_node_pages_dec(node); - if (shadow) - workingset_node_shadows_inc(node); - else - if (__radix_tree_delete_node(&mapping->page_tree, node)) + for (i = 0; i < nr; i++) { + node = radix_tree_replace_clear_tags(&mapping->page_tree, + page->index + i, shadow); + if (!node) { + VM_BUG_ON_PAGE(nr != 1, page); return; + } - /* - * Track node that only contains shadow entries. - * - * Avoid acquiring the list_lru lock if already tracked. The - * list_empty() test is safe as node->private_list is - * protected by mapping->tree_lock. - */ - if (!workingset_node_pages(node) && - list_empty(&node->private_list)) { - node->private_data = mapping; - list_lru_add(&workingset_shadow_nodes, &node->private_list); + workingset_node_pages_dec(node); + if (shadow) + workingset_node_shadows_inc(node); + else + if (__radix_tree_delete_node(&mapping->page_tree, node)) + continue; + + /* + * Track node that only contains shadow entries. DAX mappings + * contain no shadow entries and may contain other exceptional + * entries so skip those. + * + * Avoid acquiring the list_lru lock if already tracked. + * The list_empty() test is safe as node->private_list is + * protected by mapping->tree_lock. + */ + if (!dax_mapping(mapping) && !workingset_node_pages(node) && + list_empty(&node->private_list)) { + node->private_data = mapping; + list_lru_add(&workingset_shadow_nodes, + &node->private_list); + } } } @@ -181,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping, void __delete_from_page_cache(struct page *page, void *shadow) { struct address_space *mapping = page->mapping; + int nr = hpage_nr_pages(page); trace_mm_filemap_delete_from_page_cache(page); /* @@ -193,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) else cleancache_invalidate_page(mapping, page); + VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(page_mapped(page), page); if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { int mapcount; @@ -213,7 +207,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) * some other bad page check should catch it later. */ page_mapcount_reset(page); - atomic_sub(mapcount, &page->_count); + page_ref_sub(page, mapcount); } } @@ -224,9 +218,14 @@ void __delete_from_page_cache(struct page *page, void *shadow) /* hugetlb pages do not participate in page cache accounting. */ if (!PageHuge(page)) - __dec_zone_page_state(page, NR_FILE_PAGES); - if (PageSwapBacked(page)) - __dec_zone_page_state(page, NR_SHMEM); + __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); + if (PageSwapBacked(page)) { + __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); + if (PageTransHuge(page)) + __dec_node_page_state(page, NR_SHMEM_THPS); + } else { + VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page); + } /* * At this point page must be either written or cleaned by truncate. @@ -250,9 +249,8 @@ void __delete_from_page_cache(struct page *page, void *shadow) */ void delete_from_page_cache(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); unsigned long flags; - void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); @@ -265,11 +263,17 @@ void delete_from_page_cache(struct page *page) if (freepage) freepage(page); - page_cache_release(page); + + if (PageTransHuge(page) && !PageHuge(page)) { + page_ref_sub(page, HPAGE_PMD_NR); + VM_BUG_ON_PAGE(page_count(page) <= 0, page); + } else { + put_page(page); + } } EXPORT_SYMBOL(delete_from_page_cache); -static int filemap_check_errors(struct address_space *mapping) +int filemap_check_errors(struct address_space *mapping) { int ret = 0; /* Check for outstanding write errors */ @@ -281,6 +285,7 @@ static int filemap_check_errors(struct address_space *mapping) ret = -EIO; return ret; } +EXPORT_SYMBOL(filemap_check_errors); /** * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range @@ -352,8 +357,8 @@ EXPORT_SYMBOL(filemap_flush); static int __filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { - pgoff_t index = start_byte >> PAGE_CACHE_SHIFT; - pgoff_t end = end_byte >> PAGE_CACHE_SHIFT; + pgoff_t index = start_byte >> PAGE_SHIFT; + pgoff_t end = end_byte >> PAGE_SHIFT; struct pagevec pvec; int nr_pages; int ret = 0; @@ -550,7 +555,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) pgoff_t offset = old->index; freepage = mapping->a_ops->freepage; - page_cache_get(new); + get_page(new); new->mapping = mapping; new->index = offset; @@ -564,15 +569,15 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) * hugetlb pages do not participate in page cache accounting. */ if (!PageHuge(new)) - __inc_zone_page_state(new, NR_FILE_PAGES); + __inc_node_page_state(new, NR_FILE_PAGES); if (PageSwapBacked(new)) - __inc_zone_page_state(new, NR_SHMEM); + __inc_node_page_state(new, NR_SHMEM); spin_unlock_irqrestore(&mapping->tree_lock, flags); mem_cgroup_migrate(old, new); radix_tree_preload_end(); if (freepage) freepage(old); - page_cache_release(old); + put_page(old); } return error; @@ -597,14 +602,24 @@ static int page_cache_tree_insert(struct address_space *mapping, if (!radix_tree_exceptional_entry(p)) return -EEXIST; - if (WARN_ON(dax_mapping(mapping))) - return -EINVAL; - - if (shadowp) - *shadowp = p; mapping->nrexceptional--; - if (node) - workingset_node_shadows_dec(node); + if (!dax_mapping(mapping)) { + if (shadowp) + *shadowp = p; + if (node) + workingset_node_shadows_dec(node); + } else { + /* DAX can replace empty locked entry with a hole */ + WARN_ON_ONCE(p != + (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | + RADIX_DAX_ENTRY_LOCK)); + /* DAX accounts exceptional entries as normal pages */ + if (node) + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, + false); + } } radix_tree_replace_slot(slot, page); mapping->nrpages++; @@ -651,7 +666,7 @@ static int __add_to_page_cache_locked(struct page *page, return error; } - page_cache_get(page); + get_page(page); page->mapping = mapping; page->index = offset; @@ -663,7 +678,7 @@ static int __add_to_page_cache_locked(struct page *page, /* hugetlb pages do not participate in page cache accounting. */ if (!huge) - __inc_zone_page_state(page, NR_FILE_PAGES); + __inc_node_page_state(page, NR_FILE_PAGES); spin_unlock_irq(&mapping->tree_lock); if (!huge) mem_cgroup_commit_charge(page, memcg, false, false); @@ -675,7 +690,7 @@ err_insert: spin_unlock_irq(&mapping->tree_lock); if (!huge) mem_cgroup_cancel_charge(page, memcg, false); - page_cache_release(page); + put_page(page); return error; } @@ -713,8 +728,12 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, * The page might have been evicted from cache only * recently, in which case it should be activated like * any other repeatedly accessed page. + * The exception is pages getting rewritten; evicting other + * data from the working set, only to cache data that will + * get overwritten with something else, is a waste of memory. */ - if (shadow && workingset_refault(shadow)) { + if (!(gfp_mask & __GFP_WRITE) && + shadow && workingset_refault(shadow)) { SetPageActive(page); workingset_activation(page); } else @@ -868,9 +887,9 @@ EXPORT_SYMBOL(end_page_writeback); * After completing I/O on a page, call this routine to update the page * flags appropriately */ -void page_endio(struct page *page, int rw, int err) +void page_endio(struct page *page, int op, int err) { - if (rw == READ) { + if (!op_is_write(op)) { if (!err) { SetPageUptodate(page); } else { @@ -878,7 +897,7 @@ void page_endio(struct page *page, int rw, int err) SetPageError(page); } unlock_page(page); - } else { /* rw == WRITE */ + } else { if (err) { SetPageError(page); if (page->mapping) @@ -1054,7 +1073,7 @@ EXPORT_SYMBOL(page_cache_prev_hole); struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) { void **pagep; - struct page *page; + struct page *head, *page; rcu_read_lock(); repeat: @@ -1074,8 +1093,16 @@ repeat: */ goto out; } - if (!page_cache_get_speculative(page)) + + head = compound_head(page); + if (!page_cache_get_speculative(head)) + goto repeat; + + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); goto repeat; + } /* * Has the page moved? @@ -1083,7 +1110,7 @@ repeat: * include/linux/pagemap.h for details. */ if (unlikely(page != *pagep)) { - page_cache_release(page); + put_page(head); goto repeat; } } @@ -1119,12 +1146,12 @@ repeat: if (page && !radix_tree_exception(page)) { lock_page(page); /* Has the page been truncated? */ - if (unlikely(page->mapping != mapping)) { + if (unlikely(page_mapping(page) != mapping)) { unlock_page(page); - page_cache_release(page); + put_page(page); goto repeat; } - VM_BUG_ON_PAGE(page->index != offset, page); + VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); } return page; } @@ -1168,7 +1195,7 @@ repeat: if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { if (!trylock_page(page)) { - page_cache_release(page); + put_page(page); return NULL; } } else { @@ -1178,7 +1205,7 @@ repeat: /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { unlock_page(page); - page_cache_release(page); + put_page(page); goto repeat; } VM_BUG_ON_PAGE(page->index != offset, page); @@ -1209,7 +1236,7 @@ no_page: err = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_RECLAIM_MASK); if (unlikely(err)) { - page_cache_release(page); + put_page(page); page = NULL; if (err == -EEXIST) goto repeat; @@ -1256,7 +1283,7 @@ unsigned find_get_entries(struct address_space *mapping, rcu_read_lock(); radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1273,12 +1300,20 @@ repeat: */ goto export; } - if (!page_cache_get_speculative(page)) + + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); + goto repeat; + } + /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(head); goto repeat; } export: @@ -1319,7 +1354,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, rcu_read_lock(); radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1338,12 +1373,19 @@ repeat: continue; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) + goto repeat; + + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); goto repeat; + } /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(head); goto repeat; } @@ -1380,7 +1422,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, rcu_read_lock(); radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); /* The hole, there no reason to continue */ @@ -1400,12 +1442,19 @@ repeat: break; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) + goto repeat; + + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); goto repeat; + } /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(head); goto repeat; } @@ -1414,8 +1463,8 @@ repeat: * otherwise we can get both false positives and false * negatives, which is just confusing to the caller. */ - if (page->mapping == NULL || page->index != iter.index) { - page_cache_release(page); + if (page->mapping == NULL || page_to_pgoff(page) != iter.index) { + put_page(page); break; } @@ -1452,7 +1501,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, rcu_read_lock(); radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, *index, tag) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1477,12 +1526,19 @@ repeat: continue; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); + goto repeat; + } + /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(head); goto repeat; } @@ -1526,7 +1582,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, rcu_read_lock(); radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start, tag) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1544,12 +1600,20 @@ repeat: */ goto export; } - if (!page_cache_get_speculative(page)) + + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); + goto repeat; + } + /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(head); goto repeat; } export: @@ -1610,11 +1674,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, unsigned int prev_offset; int error = 0; - index = *ppos >> PAGE_CACHE_SHIFT; - prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; - prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); - last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; - offset = *ppos & ~PAGE_CACHE_MASK; + index = *ppos >> PAGE_SHIFT; + prev_index = ra->prev_pos >> PAGE_SHIFT; + prev_offset = ra->prev_pos & (PAGE_SIZE-1); + last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; + offset = *ppos & ~PAGE_MASK; for (;;) { struct page *page; @@ -1648,7 +1712,7 @@ find_page: if (PageUptodate(page)) goto page_ok; - if (inode->i_blkbits == PAGE_CACHE_SHIFT || + if (inode->i_blkbits == PAGE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; if (!trylock_page(page)) @@ -1672,18 +1736,18 @@ page_ok: */ isize = i_size_read(inode); - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + end_index = (isize - 1) >> PAGE_SHIFT; if (unlikely(!isize || index > end_index)) { - page_cache_release(page); + put_page(page); goto out; } /* nr is the maximum number of bytes to copy from this page */ - nr = PAGE_CACHE_SIZE; + nr = PAGE_SIZE; if (index == end_index) { - nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + nr = ((isize - 1) & ~PAGE_MASK) + 1; if (nr <= offset) { - page_cache_release(page); + put_page(page); goto out; } } @@ -1711,11 +1775,11 @@ page_ok: ret = copy_page_to_iter(page, offset, nr, iter); offset += ret; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; + index += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; prev_offset = offset; - page_cache_release(page); + put_page(page); written += ret; if (!iov_iter_count(iter)) goto out; @@ -1735,7 +1799,7 @@ page_not_up_to_date_locked: /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); continue; } @@ -1757,7 +1821,7 @@ readpage: if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); + put_page(page); error = 0; goto find_page; } @@ -1774,7 +1838,7 @@ readpage: * invalidate_mapping_pages got it */ unlock_page(page); - page_cache_release(page); + put_page(page); goto find_page; } unlock_page(page); @@ -1789,7 +1853,7 @@ readpage: readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ - page_cache_release(page); + put_page(page); goto out; no_cached_page: @@ -1805,7 +1869,7 @@ no_cached_page: error = add_to_page_cache_lru(page, mapping, index, mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error) { - page_cache_release(page); + put_page(page); if (error == -EEXIST) { error = 0; goto find_page; @@ -1817,10 +1881,10 @@ no_cached_page: out: ra->prev_pos = prev_index; - ra->prev_pos <<= PAGE_CACHE_SHIFT; + ra->prev_pos <<= PAGE_SHIFT; ra->prev_pos |= prev_offset; - *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; + *ppos = ((loff_t)index << PAGE_SHIFT) + offset; file_accessed(filp); return written ? written : error; } @@ -1838,8 +1902,6 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; ssize_t retval = 0; - loff_t *ppos = &iocb->ki_pos; - loff_t pos = *ppos; size_t count = iov_iter_count(iter); if (!count) @@ -1851,15 +1913,15 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t size; size = i_size_read(inode); - retval = filemap_write_and_wait_range(mapping, pos, - pos + count - 1); + retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, + iocb->ki_pos + count - 1); if (!retval) { struct iov_iter data = *iter; - retval = mapping->a_ops->direct_IO(iocb, &data, pos); + retval = mapping->a_ops->direct_IO(iocb, &data); } if (retval > 0) { - *ppos = pos + retval; + iocb->ki_pos += retval; iov_iter_advance(iter, retval); } @@ -1872,14 +1934,14 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. */ - if (retval < 0 || !iov_iter_count(iter) || *ppos >= size || + if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size || IS_DAX(inode)) { file_accessed(file); goto out; } } - retval = do_generic_file_read(file, ppos, iter, retval); + retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval); out: return retval; } @@ -1912,7 +1974,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) else if (ret == -EEXIST) ret = 0; /* losing race to add is OK */ - page_cache_release(page); + put_page(page); } while (ret == AOP_TRUNCATED_PAGE); @@ -2022,8 +2084,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size; int ret = 0; - size = round_up(i_size_read(inode), PAGE_CACHE_SIZE); - if (offset >= size >> PAGE_CACHE_SHIFT) + size = round_up(i_size_read(inode), PAGE_SIZE); + if (offset >= size >> PAGE_SHIFT) return VM_FAULT_SIGBUS; /* @@ -2049,7 +2111,7 @@ retry_find: } if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { - page_cache_release(page); + put_page(page); return ret | VM_FAULT_RETRY; } @@ -2072,10 +2134,10 @@ retry_find: * Found the page and have a reference on it. * We must recheck i_size under page lock. */ - size = round_up(i_size_read(inode), PAGE_CACHE_SIZE); - if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) { + size = round_up(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= size >> PAGE_SHIFT)) { unlock_page(page); - page_cache_release(page); + put_page(page); return VM_FAULT_SIGBUS; } @@ -2120,7 +2182,7 @@ page_not_uptodate: if (!PageUptodate(page)) error = -EIO; } - page_cache_release(page); + put_page(page); if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; @@ -2131,21 +2193,21 @@ page_not_uptodate: } EXPORT_SYMBOL(filemap_fault); -void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) +void filemap_map_pages(struct fault_env *fe, + pgoff_t start_pgoff, pgoff_t end_pgoff) { struct radix_tree_iter iter; void **slot; - struct file *file = vma->vm_file; + struct file *file = fe->vma->vm_file; struct address_space *mapping = file->f_mapping; + pgoff_t last_pgoff = start_pgoff; loff_t size; - struct page *page; - unsigned long address = (unsigned long) vmf->virtual_address; - unsigned long addr; - pte_t *pte; + struct page *head, *page; rcu_read_lock(); - radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) { - if (iter.index > vmf->max_pgoff) + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, + start_pgoff) { + if (iter.index > end_pgoff) break; repeat: page = radix_tree_deref_slot(slot); @@ -2159,12 +2221,19 @@ repeat: goto next; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); + goto repeat; + } + /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(head); goto repeat; } @@ -2178,26 +2247,30 @@ repeat: if (page->mapping != mapping || !PageUptodate(page)) goto unlock; - size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE); - if (page->index >= size >> PAGE_CACHE_SHIFT) - goto unlock; - - pte = vmf->pte + page->index - vmf->pgoff; - if (!pte_none(*pte)) + size = round_up(i_size_read(mapping->host), PAGE_SIZE); + if (page->index >= size >> PAGE_SHIFT) goto unlock; if (file->f_ra.mmap_miss > 0) file->f_ra.mmap_miss--; - addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; - do_set_pte(vma, addr, page, pte, false, false); + + fe->address += (iter.index - last_pgoff) << PAGE_SHIFT; + if (fe->pte) + fe->pte += iter.index - last_pgoff; + last_pgoff = iter.index; + if (alloc_set_pte(fe, NULL, page)) + goto unlock; unlock_page(page); goto next; unlock: unlock_page(page); skip: - page_cache_release(page); + put_page(page); next: - if (iter.index == vmf->max_pgoff) + /* Huge page is mapped? No need to proceed. */ + if (pmd_trans_huge(*fe->pmd)) + break; + if (iter.index == end_pgoff) break; } rcu_read_unlock(); @@ -2278,7 +2351,7 @@ static struct page *wait_on_page_read(struct page *page) if (!IS_ERR(page)) { wait_on_page_locked(page); if (!PageUptodate(page)) { - page_cache_release(page); + put_page(page); page = ERR_PTR(-EIO); } } @@ -2301,7 +2374,7 @@ repeat: return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { - page_cache_release(page); + put_page(page); if (err == -EEXIST) goto repeat; /* Presumably ENOMEM for radix tree node */ @@ -2311,7 +2384,7 @@ repeat: filler: err = filler(data, page); if (err < 0) { - page_cache_release(page); + put_page(page); return ERR_PTR(err); } @@ -2364,7 +2437,7 @@ filler: /* Case c or d, restart the operation */ if (!page->mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); goto repeat; } @@ -2500,18 +2573,19 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + loff_t pos = iocb->ki_pos; ssize_t written; size_t write_len; pgoff_t end; struct iov_iter data; write_len = iov_iter_count(from); - end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; + end = (pos + write_len - 1) >> PAGE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); if (written) @@ -2525,7 +2599,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) */ if (mapping->nrpages) { written = invalidate_inode_pages2_range(mapping, - pos >> PAGE_CACHE_SHIFT, end); + pos >> PAGE_SHIFT, end); /* * If a page can not be invalidated, return 0 to fall back * to buffered write. @@ -2538,7 +2612,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) } data = *from; - written = mapping->a_ops->direct_IO(iocb, &data, pos); + written = mapping->a_ops->direct_IO(iocb, &data); /* * Finally, try again to invalidate clean pages which might have been @@ -2550,7 +2624,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) */ if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, - pos >> PAGE_CACHE_SHIFT, end); + pos >> PAGE_SHIFT, end); } if (written > 0) { @@ -2575,7 +2649,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) { struct page *page; - int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT; + int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; if (flags & AOP_FLAG_NOFS) fgp_flags |= FGP_NOFS; @@ -2611,8 +2685,8 @@ ssize_t generic_perform_write(struct file *file, size_t copied; /* Bytes copied from user */ void *fsdata; - offset = (pos & (PAGE_CACHE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + offset = (pos & (PAGE_SIZE - 1)); + bytes = min_t(unsigned long, PAGE_SIZE - offset, iov_iter_count(i)); again: @@ -2665,7 +2739,7 @@ again: * because not all segments in the iov can be copied at * once without a pagefault. */ - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + bytes = min_t(unsigned long, PAGE_SIZE - offset, iov_iter_single_seg_count(i)); goto again; } @@ -2718,7 +2792,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos, endbyte; - written = generic_file_direct_write(iocb, from, iocb->ki_pos); + written = generic_file_direct_write(iocb, from); /* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to @@ -2752,8 +2826,8 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) iocb->ki_pos = endbyte + 1; written += status; invalidate_mapping_pages(mapping, - pos >> PAGE_CACHE_SHIFT, - endbyte >> PAGE_CACHE_SHIFT); + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); } else { /* * We don't know how much we wrote, so just return @@ -2792,13 +2866,8 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); inode_unlock(inode); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); return ret; } EXPORT_SYMBOL(generic_file_write_iter); |