diff options
Diffstat (limited to 'mm/hugetlb.c')
| -rw-r--r-- | mm/hugetlb.c | 219 | 
1 files changed, 210 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5cf99bf8cce2..aed085ad11a8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2948,15 +2948,6 @@ out_mutex:  	return ret;  } -/* Can be overriden by architectures */ -__attribute__((weak)) struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, -	       pud_t *pud, int write) -{ -	BUG(); -	return NULL; -} -  long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,  			 struct page **pages, struct vm_area_struct **vmas,  			 unsigned long *position, unsigned long *nr_pages, @@ -3186,6 +3177,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)  	hugetlb_acct_memory(h, -(chg - freed));  } +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static unsigned long page_table_shareable(struct vm_area_struct *svma, +				struct vm_area_struct *vma, +				unsigned long addr, pgoff_t idx) +{ +	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + +				svma->vm_start; +	unsigned long sbase = saddr & PUD_MASK; +	unsigned long s_end = sbase + PUD_SIZE; + +	/* Allow segments to share if only one is marked locked */ +	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; +	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + +	/* +	 * match the virtual addresses, permission and the alignment of the +	 * page table page. +	 */ +	if (pmd_index(addr) != pmd_index(saddr) || +	    vm_flags != svm_flags || +	    sbase < svma->vm_start || svma->vm_end < s_end) +		return 0; + +	return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ +	unsigned long base = addr & PUD_MASK; +	unsigned long end = base + PUD_SIZE; + +	/* +	 * check on proper vm_flags and page table alignment +	 */ +	if (vma->vm_flags & VM_MAYSHARE && +	    vma->vm_start <= base && end <= vma->vm_end) +		return 1; +	return 0; +} + +/* + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. + */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ +	struct vm_area_struct *vma = find_vma(mm, addr); +	struct address_space *mapping = vma->vm_file->f_mapping; +	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + +			vma->vm_pgoff; +	struct vm_area_struct *svma; +	unsigned long saddr; +	pte_t *spte = NULL; +	pte_t *pte; + +	if (!vma_shareable(vma, addr)) +		return (pte_t *)pmd_alloc(mm, pud, addr); + +	mutex_lock(&mapping->i_mmap_mutex); +	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { +		if (svma == vma) +			continue; + +		saddr = page_table_shareable(svma, vma, addr, idx); +		if (saddr) { +			spte = huge_pte_offset(svma->vm_mm, saddr); +			if (spte) { +				get_page(virt_to_page(spte)); +				break; +			} +		} +	} + +	if (!spte) +		goto out; + +	spin_lock(&mm->page_table_lock); +	if (pud_none(*pud)) +		pud_populate(mm, pud, +				(pmd_t *)((unsigned long)spte & PAGE_MASK)); +	else +		put_page(virt_to_page(spte)); +	spin_unlock(&mm->page_table_lock); +out: +	pte = (pte_t *)pmd_alloc(mm, pud, addr); +	mutex_unlock(&mapping->i_mmap_mutex); +	return pte; +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + *	    0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ +	pgd_t *pgd = pgd_offset(mm, *addr); +	pud_t *pud = pud_offset(pgd, *addr); + +	BUG_ON(page_count(virt_to_page(ptep)) == 0); +	if (page_count(virt_to_page(ptep)) == 1) +		return 0; + +	pud_clear(pud); +	put_page(virt_to_page(ptep)); +	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; +	return 1; +} +#define want_pmd_share()	(1) +#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ +	return NULL; +} +#define want_pmd_share()	(0) +#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ + +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB +pte_t *huge_pte_alloc(struct mm_struct *mm, +			unsigned long addr, unsigned long sz) +{ +	pgd_t *pgd; +	pud_t *pud; +	pte_t *pte = NULL; + +	pgd = pgd_offset(mm, addr); +	pud = pud_alloc(mm, pgd, addr); +	if (pud) { +		if (sz == PUD_SIZE) { +			pte = (pte_t *)pud; +		} else { +			BUG_ON(sz != PMD_SIZE); +			if (want_pmd_share() && pud_none(*pud)) +				pte = huge_pmd_share(mm, addr, pud); +			else +				pte = (pte_t *)pmd_alloc(mm, pud, addr); +		} +	} +	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + +	return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ +	pgd_t *pgd; +	pud_t *pud; +	pmd_t *pmd = NULL; + +	pgd = pgd_offset(mm, addr); +	if (pgd_present(*pgd)) { +		pud = pud_offset(pgd, addr); +		if (pud_present(*pud)) { +			if (pud_huge(*pud)) +				return (pte_t *)pud; +			pmd = pmd_offset(pud, addr); +		} +	} +	return (pte_t *) pmd; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, +		pmd_t *pmd, int write) +{ +	struct page *page; + +	page = pte_page(*(pte_t *)pmd); +	if (page) +		page += ((address & ~PMD_MASK) >> PAGE_SHIFT); +	return page; +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, +		pud_t *pud, int write) +{ +	struct page *page; + +	page = pte_page(*(pte_t *)pud); +	if (page) +		page += ((address & ~PUD_MASK) >> PAGE_SHIFT); +	return page; +} + +#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, +	       pud_t *pud, int write) +{ +	BUG(); +	return NULL; +} + +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ +  #ifdef CONFIG_MEMORY_FAILURE  /* Should be called in hugetlb_lock */  | 
