From e98337d11bbdfa3e3f0fb99aa93e40f97549e0cd Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 13 Aug 2024 21:54:49 -0600 Subject: mm/contig_alloc: support __GFP_COMP Patch series "mm/hugetlb: alloc/free gigantic folios", v2. Use __GFP_COMP for gigantic folios can greatly reduce not only the amount of code but also the allocation and free time. Approximate LOC to mm/hugetlb.c: +60, -240 Allocate and free 500 1GB hugeTLB memory without HVO by: time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages Before After Alloc ~13s ~10s Free ~15s <1s The above magnitude generally holds for multiple x86 and arm64 CPU models. Perf profile before: Alloc - 99.99% alloc_pool_huge_folio - __alloc_fresh_hugetlb_folio - 83.23% alloc_contig_pages_noprof - 47.46% alloc_contig_range_noprof - 20.96% isolate_freepages_range 16.10% split_page - 14.10% start_isolate_page_range - 12.02% undo_isolate_page_range Free - update_and_free_pages_bulk - 87.71% free_contig_range - 76.02% free_unref_page - 41.30% free_unref_page_commit - 32.58% free_pcppages_bulk - 24.75% __free_one_page 13.96% _raw_spin_trylock 12.27% __update_and_free_hugetlb_folio Perf profile after: Alloc - 99.99% alloc_pool_huge_folio alloc_gigantic_folio - alloc_contig_pages_noprof - 59.15% alloc_contig_range_noprof - 20.72% start_isolate_page_range 20.64% prep_new_page - 17.13% undo_isolate_page_range Free - update_and_free_pages_bulk - __folio_put - __free_pages_ok 7.46% free_tail_page_prepare - 1.97% free_one_page 1.86% __free_one_page This patch (of 3): Support __GFP_COMP in alloc_contig_range(). When the flag is set, upon success the function returns a large folio prepared by prep_new_page(), rather than a range of order-0 pages prepared by split_free_pages() (which is renamed from split_map_pages()). alloc_contig_range() can be used to allocate folios larger than MAX_PAGE_ORDER, e.g., gigantic hugeTLB folios. So on the free path, free_one_page() needs to handle that by split_large_buddy(). [akpm@linux-foundation.org: fix folio_alloc_gigantic_noprof() WARN expression, per Yu Liao] Link: https://lkml.kernel.org/r/20240814035451.773331-1-yuzhao@google.com Link: https://lkml.kernel.org/r/20240814035451.773331-2-yuzhao@google.com Signed-off-by: Yu Zhao Acked-by: Zi Yan Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Frank van der Linden Signed-off-by: Andrew Morton --- mm/compaction.c | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index eb95e9b435d0..d1041fbce679 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -86,33 +86,6 @@ static struct page *mark_allocated_noprof(struct page *page, unsigned int order, } #define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__)) -static void split_map_pages(struct list_head *freepages) -{ - unsigned int i, order; - struct page *page, *next; - LIST_HEAD(tmp_list); - - for (order = 0; order < NR_PAGE_ORDERS; order++) { - list_for_each_entry_safe(page, next, &freepages[order], lru) { - unsigned int nr_pages; - - list_del(&page->lru); - - nr_pages = 1 << order; - - mark_allocated(page, order, __GFP_MOVABLE); - if (order) - split_page(page, order); - - for (i = 0; i < nr_pages; i++) { - list_add(&page->lru, &tmp_list); - page++; - } - } - list_splice_init(&tmp_list, &freepages[0]); - } -} - static unsigned long release_free_list(struct list_head *freepages) { int order; @@ -742,11 +715,11 @@ isolate_fail: * * Non-free pages, invalid PFNs, or zone boundaries within the * [start_pfn, end_pfn) range are considered errors, cause function to - * undo its actions and return zero. + * undo its actions and return zero. cc->freepages[] are empty. * * Otherwise, function returns one-past-the-last PFN of isolated page * (which may be greater then end_pfn if end fell in a middle of - * a free page). + * a free page). cc->freepages[] contain free pages isolated. */ unsigned long isolate_freepages_range(struct compact_control *cc, @@ -754,10 +727,9 @@ isolate_freepages_range(struct compact_control *cc, { unsigned long isolated, pfn, block_start_pfn, block_end_pfn; int order; - struct list_head tmp_freepages[NR_PAGE_ORDERS]; for (order = 0; order < NR_PAGE_ORDERS; order++) - INIT_LIST_HEAD(&tmp_freepages[order]); + INIT_LIST_HEAD(&cc->freepages[order]); pfn = start_pfn; block_start_pfn = pageblock_start_pfn(pfn); @@ -788,7 +760,7 @@ isolate_freepages_range(struct compact_control *cc, break; isolated = isolate_freepages_block(cc, &isolate_start_pfn, - block_end_pfn, tmp_freepages, 0, true); + block_end_pfn, cc->freepages, 0, true); /* * In strict mode, isolate_freepages_block() returns 0 if @@ -807,13 +779,10 @@ isolate_freepages_range(struct compact_control *cc, if (pfn < end_pfn) { /* Loop terminated early, cleanup. */ - release_free_list(tmp_freepages); + release_free_list(cc->freepages); return 0; } - /* __isolate_free_page() does not map the pages */ - split_map_pages(tmp_freepages); - /* We don't use freelists for anything. */ return pfn; } -- cgit v1.2.3-70-g09d2