diff options
Diffstat (limited to 'mm/mlock.c')
| -rw-r--r-- | mm/mlock.c | 44 | 
1 files changed, 32 insertions, 12 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index d480cd6fc475..192e6eebe4f2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page)  /**   * munlock_vma_page - munlock a vma page - * @page - page to be unlocked + * @page - page to be unlocked, either a normal page or THP page head + * + * returns the size of the page as a page mask (0 for normal page, + *         HPAGE_PMD_NR - 1 for THP head page)   *   * called from munlock()/munmap() path with page supposedly on the LRU.   * When we munlock a page, because the vma where we found the page is being @@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page)   */  unsigned int munlock_vma_page(struct page *page)  { -	unsigned int page_mask = 0; +	unsigned int nr_pages;  	BUG_ON(!PageLocked(page));  	if (TestClearPageMlocked(page)) { -		unsigned int nr_pages = hpage_nr_pages(page); +		nr_pages = hpage_nr_pages(page);  		mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); -		page_mask = nr_pages - 1;  		if (!isolate_lru_page(page))  			__munlock_isolated_page(page);  		else  			__munlock_isolation_failed(page); +	} else { +		nr_pages = hpage_nr_pages(page);  	} -	return page_mask; +	/* +	 * Regardless of the original PageMlocked flag, we determine nr_pages +	 * after touching the flag. This leaves a possible race with a THP page +	 * split, such that a whole THP page was munlocked, but nr_pages == 1. +	 * Returning a smaller mask due to that is OK, the worst that can +	 * happen is subsequent useless scanning of the former tail pages. +	 * The NR_MLOCK accounting can however become broken. +	 */ +	return nr_pages - 1;  }  /** @@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)  {  	int i;  	int nr = pagevec_count(pvec); -	int delta_munlocked = -nr; +	int delta_munlocked;  	struct pagevec pvec_putback;  	int pgrescued = 0; +	pagevec_init(&pvec_putback, 0); +  	/* Phase 1: page isolation */  	spin_lock_irq(&zone->lru_lock);  	for (i = 0; i < nr; i++) { @@ -318,18 +332,21 @@ skip_munlock:  			/*  			 * We won't be munlocking this page in the next phase  			 * but we still need to release the follow_page_mask() -			 * pin. +			 * pin. We cannot do it under lru_lock however. If it's +			 * the last pin, __page_cache_release would deadlock.  			 */ +			pagevec_add(&pvec_putback, pvec->pages[i]);  			pvec->pages[i] = NULL; -			put_page(page); -			delta_munlocked++;  		}  	} +	delta_munlocked = -nr + pagevec_count(&pvec_putback);  	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);  	spin_unlock_irq(&zone->lru_lock); +	/* Now we can release pins of pages that we are not munlocking */ +	pagevec_release(&pvec_putback); +  	/* Phase 2: page munlock */ -	pagevec_init(&pvec_putback, 0);  	for (i = 0; i < nr; i++) {  		struct page *page = pvec->pages[i]; @@ -440,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,  	while (start < end) {  		struct page *page = NULL; -		unsigned int page_mask, page_increm; +		unsigned int page_mask; +		unsigned long page_increm;  		struct pagevec pvec;  		struct zone *zone;  		int zoneid; @@ -490,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,  				goto next;  			}  		} -		page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); +		/* It's a bug to munlock in the middle of a THP page */ +		VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); +		page_increm = 1 + page_mask;  		start += page_increm * PAGE_SIZE;  next:  		cond_resched();  | 
