diff options
Diffstat (limited to 'tools/testing')
64 files changed, 5386 insertions, 1040 deletions
diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py index 6207be146d26..12a1d525978a 100644 --- a/tools/testing/kunit/qemu_configs/riscv.py +++ b/tools/testing/kunit/qemu_configs/riscv.py @@ -3,17 +3,13 @@ import os import os.path import sys -GITHUB_OPENSBI_URL = 'https://github.com/qemu/qemu/raw/master/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin' -OPENSBI_FILE = os.path.basename(GITHUB_OPENSBI_URL) +OPENSBI_FILE = 'opensbi-riscv64-generic-fw_dynamic.bin' +OPENSBI_PATH = '/usr/share/qemu/' + OPENSBI_FILE -if not os.path.isfile(OPENSBI_FILE): - print('\n\nOpenSBI file is not in the current working directory.\n' - 'Would you like me to download it for you from:\n' + GITHUB_OPENSBI_URL + ' ?\n') - response = input('yes/[no]: ') - if response.strip() == 'yes': - os.system('wget ' + GITHUB_OPENSBI_URL) - else: - sys.exit() +if not os.path.isfile(OPENSBI_PATH): + print('\n\nOpenSBI bios was not found in "' + OPENSBI_PATH + '".\n' + 'Please ensure that qemu-system-riscv is installed, or edit the path in "qemu_configs/riscv.py"\n') + sys.exit() QEMU_ARCH = QemuArchParams(linux_arch='riscv', kconfig=''' @@ -29,4 +25,4 @@ CONFIG_SERIAL_EARLYCON_RISCV_SBI=y''', extra_qemu_params=[ '-machine', 'virt', '-cpu', 'rv64', - '-bios', 'opensbi-riscv64-generic-fw_dynamic.bin']) + '-bios', OPENSBI_PATH]) diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h index 7c2eb5c9bb54..e65f89b12f1c 100644 --- a/tools/testing/memblock/linux/mmzone.h +++ b/tools/testing/memblock/linux/mmzone.h @@ -22,6 +22,8 @@ enum zone_type { #define pageblock_order (MAX_ORDER - 1) #define pageblock_nr_pages BIT(pageblock_order) +#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) struct zone { atomic_long_t managed_pages; diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include index aa6d82d56a23..998281723590 100644 --- a/tools/testing/memblock/scripts/Makefile.include +++ b/tools/testing/memblock/scripts/Makefile.include @@ -3,7 +3,7 @@ # Simulate CONFIG_NUMA=y ifeq ($(NUMA), 1) - CFLAGS += -D CONFIG_NUMA + CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4 endif # Use 32 bit physical addresses. diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c index a14f38eb8a89..68f1a75cd72c 100644 --- a/tools/testing/memblock/tests/alloc_api.c +++ b/tools/testing/memblock/tests/alloc_api.c @@ -1,6 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "alloc_api.h" +static int alloc_test_flags = TEST_F_NONE; + +static inline const char * const get_memblock_alloc_name(int flags) +{ + if (flags & TEST_F_RAW) + return "memblock_alloc_raw"; + return "memblock_alloc"; +} + +static inline void *run_memblock_alloc(phys_addr_t size, phys_addr_t align) +{ + if (alloc_test_flags & TEST_F_RAW) + return memblock_alloc_raw(size, align); + return memblock_alloc(size, align); +} + /* * A simple test that tries to allocate a small memory region. * Expect to allocate an aligned region near the end of the available memory. @@ -9,19 +25,19 @@ static int alloc_top_down_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t size = SZ_2; phys_addr_t expected_start; + PREFIX_PUSH(); setup_memblock(); expected_start = memblock_end_of_DRAM() - SMP_CACHE_BYTES; - allocated_ptr = memblock_alloc(size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_test_flags); + ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, expected_start); @@ -58,15 +74,13 @@ static int alloc_top_down_disjoint_check(void) struct memblock_region *rgn2 = &memblock.reserved.regions[0]; struct region r1; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r2_size = SZ_16; /* Use custom alignment */ phys_addr_t alignment = SMP_CACHE_BYTES * 2; phys_addr_t total_size; phys_addr_t expected_start; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SZ_2; @@ -77,9 +91,11 @@ static int alloc_top_down_disjoint_check(void) memblock_reserve(r1.base, r1.size); - allocated_ptr = memblock_alloc(r2_size, alignment); + allocated_ptr = run_memblock_alloc(r2_size, alignment); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r2_size, alloc_test_flags); + ASSERT_EQ(rgn1->size, r1.size); ASSERT_EQ(rgn1->base, r1.base); @@ -108,9 +124,6 @@ static int alloc_top_down_before_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - /* * The first region ends at the aligned address to test region merging */ @@ -118,13 +131,16 @@ static int alloc_top_down_before_check(void) phys_addr_t r2_size = SZ_512; phys_addr_t total_size = r1_size + r2_size; + PREFIX_PUSH(); setup_memblock(); memblock_reserve(memblock_end_of_DRAM() - total_size, r1_size); - allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r2_size, alloc_test_flags); + ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - total_size); @@ -152,12 +168,10 @@ static int alloc_top_down_after_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; struct region r1; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r2_size = SZ_512; phys_addr_t total_size; + PREFIX_PUSH(); setup_memblock(); /* @@ -170,9 +184,11 @@ static int alloc_top_down_after_check(void) memblock_reserve(r1.base, r1.size); - allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r2_size, alloc_test_flags); + ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, r1.base - r2_size); @@ -201,12 +217,10 @@ static int alloc_top_down_second_fit_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; struct region r1, r2; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_1K; phys_addr_t total_size; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SZ_512; @@ -220,9 +234,11 @@ static int alloc_top_down_second_fit_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r3_size, alloc_test_flags); + ASSERT_EQ(rgn->size, r2.size + r3_size); ASSERT_EQ(rgn->base, r2.base - r3_size); @@ -250,9 +266,6 @@ static int alloc_in_between_generic_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; struct region r1, r2; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t r3_size = SZ_64; /* @@ -261,6 +274,7 @@ static int alloc_in_between_generic_check(void) phys_addr_t rgn_size = (MEM_SIZE - (2 * gap_size + r3_size)) / 2; phys_addr_t total_size; + PREFIX_PUSH(); setup_memblock(); r1.size = rgn_size; @@ -274,9 +288,11 @@ static int alloc_in_between_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r3_size, alloc_test_flags); + ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, r1.base - r2.size - r3_size); @@ -304,13 +320,11 @@ static int alloc_in_between_generic_check(void) static int alloc_small_gaps_generic_check(void) { void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t region_size = SZ_1K; phys_addr_t gap_size = SZ_256; phys_addr_t region_end; + PREFIX_PUSH(); setup_memblock(); region_end = memblock_start_of_DRAM(); @@ -320,7 +334,7 @@ static int alloc_small_gaps_generic_check(void) region_end += gap_size + region_size; } - allocated_ptr = memblock_alloc(region_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(region_size, SMP_CACHE_BYTES); ASSERT_EQ(allocated_ptr, NULL); @@ -338,13 +352,12 @@ static int alloc_all_reserved_generic_check(void) void *allocated_ptr = NULL; PREFIX_PUSH(); - setup_memblock(); /* Simulate full memory */ memblock_reserve(memblock_start_of_DRAM(), MEM_SIZE); - allocated_ptr = memblock_alloc(SZ_256, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(SZ_256, SMP_CACHE_BYTES); ASSERT_EQ(allocated_ptr, NULL); @@ -369,18 +382,16 @@ static int alloc_all_reserved_generic_check(void) static int alloc_no_space_generic_check(void) { void *allocated_ptr = NULL; + phys_addr_t available_size = SZ_256; + phys_addr_t reserved_size = MEM_SIZE - available_size; PREFIX_PUSH(); - setup_memblock(); - phys_addr_t available_size = SZ_256; - phys_addr_t reserved_size = MEM_SIZE - available_size; - /* Simulate almost-full memory */ memblock_reserve(memblock_start_of_DRAM(), reserved_size); - allocated_ptr = memblock_alloc(SZ_1K, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(SZ_1K, SMP_CACHE_BYTES); ASSERT_EQ(allocated_ptr, NULL); @@ -404,20 +415,20 @@ static int alloc_limited_space_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t available_size = SZ_256; phys_addr_t reserved_size = MEM_SIZE - available_size; + PREFIX_PUSH(); setup_memblock(); /* Simulate almost-full memory */ memblock_reserve(memblock_start_of_DRAM(), reserved_size); - allocated_ptr = memblock_alloc(available_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(available_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, available_size, alloc_test_flags); + ASSERT_EQ(rgn->size, MEM_SIZE); ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); @@ -443,7 +454,40 @@ static int alloc_no_memory_generic_check(void) reset_memblock_regions(); - allocated_ptr = memblock_alloc(SZ_1K, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(SZ_1K, SMP_CACHE_BYTES); + + ASSERT_EQ(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, 0); + ASSERT_EQ(rgn->base, 0); + ASSERT_EQ(memblock.reserved.total_size, 0); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a region that is larger than the total size of + * available memory (memblock.memory): + * + * +-----------------------------------+ + * | new | + * +-----------------------------------+ + * | | + * | | + * +---------------------------------+ + * + * Expect no allocation to happen. + */ +static int alloc_too_large_generic_check(void) +{ + struct memblock_region *rgn = &memblock.reserved.regions[0]; + void *allocated_ptr = NULL; + + PREFIX_PUSH(); + setup_memblock(); + + allocated_ptr = run_memblock_alloc(MEM_SIZE + SZ_2, SMP_CACHE_BYTES); ASSERT_EQ(allocated_ptr, NULL); ASSERT_EQ(rgn->size, 0); @@ -466,12 +510,13 @@ static int alloc_bottom_up_simple_check(void) void *allocated_ptr = NULL; PREFIX_PUSH(); - setup_memblock(); - allocated_ptr = memblock_alloc(SZ_2, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(SZ_2, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, SZ_2, alloc_test_flags); + ASSERT_EQ(rgn->size, SZ_2); ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); @@ -506,15 +551,13 @@ static int alloc_bottom_up_disjoint_check(void) struct memblock_region *rgn2 = &memblock.reserved.regions[1]; struct region r1; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r2_size = SZ_16; /* Use custom alignment */ phys_addr_t alignment = SMP_CACHE_BYTES * 2; phys_addr_t total_size; phys_addr_t expected_start; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_start_of_DRAM() + SZ_2; @@ -525,9 +568,10 @@ static int alloc_bottom_up_disjoint_check(void) memblock_reserve(r1.base, r1.size); - allocated_ptr = memblock_alloc(r2_size, alignment); + allocated_ptr = run_memblock_alloc(r2_size, alignment); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r2_size, alloc_test_flags); ASSERT_EQ(rgn1->size, r1.size); ASSERT_EQ(rgn1->base, r1.base); @@ -557,20 +601,20 @@ static int alloc_bottom_up_before_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_512; phys_addr_t r2_size = SZ_128; phys_addr_t total_size = r1_size + r2_size; + PREFIX_PUSH(); setup_memblock(); memblock_reserve(memblock_start_of_DRAM() + r1_size, r2_size); - allocated_ptr = memblock_alloc(r1_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r1_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r1_size, alloc_test_flags); + ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); @@ -597,12 +641,10 @@ static int alloc_bottom_up_after_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; struct region r1; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r2_size = SZ_512; phys_addr_t total_size; + PREFIX_PUSH(); setup_memblock(); /* @@ -615,9 +657,11 @@ static int alloc_bottom_up_after_check(void) memblock_reserve(r1.base, r1.size); - allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r2_size, alloc_test_flags); + ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, r1.base); @@ -647,12 +691,10 @@ static int alloc_bottom_up_second_fit_check(void) struct memblock_region *rgn = &memblock.reserved.regions[1]; struct region r1, r2; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_1K; phys_addr_t total_size; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_start_of_DRAM(); @@ -666,9 +708,11 @@ static int alloc_bottom_up_second_fit_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES); + allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES); ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, r3_size, alloc_test_flags); + ASSERT_EQ(rgn->size, r2.size + r3_size); ASSERT_EQ(rgn->base, r2.base); @@ -728,10 +772,8 @@ static int alloc_after_check(void) static int alloc_in_between_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_in_between_generic_check(); - memblock_set_bottom_up(true); - alloc_in_between_generic_check(); + run_top_down(alloc_in_between_generic_check); + run_bottom_up(alloc_in_between_generic_check); return 0; } @@ -750,10 +792,8 @@ static int alloc_second_fit_check(void) static int alloc_small_gaps_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_small_gaps_generic_check(); - memblock_set_bottom_up(true); - alloc_small_gaps_generic_check(); + run_top_down(alloc_small_gaps_generic_check); + run_bottom_up(alloc_small_gaps_generic_check); return 0; } @@ -761,10 +801,8 @@ static int alloc_small_gaps_check(void) static int alloc_all_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_all_reserved_generic_check(); - memblock_set_bottom_up(true); - alloc_all_reserved_generic_check(); + run_top_down(alloc_all_reserved_generic_check); + run_bottom_up(alloc_all_reserved_generic_check); return 0; } @@ -772,10 +810,8 @@ static int alloc_all_reserved_check(void) static int alloc_no_space_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_no_space_generic_check(); - memblock_set_bottom_up(true); - alloc_no_space_generic_check(); + run_top_down(alloc_no_space_generic_check); + run_bottom_up(alloc_no_space_generic_check); return 0; } @@ -783,10 +819,8 @@ static int alloc_no_space_check(void) static int alloc_limited_space_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_limited_space_generic_check(); - memblock_set_bottom_up(true); - alloc_limited_space_generic_check(); + run_top_down(alloc_limited_space_generic_check); + run_bottom_up(alloc_limited_space_generic_check); return 0; } @@ -794,21 +828,29 @@ static int alloc_limited_space_check(void) static int alloc_no_memory_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_no_memory_generic_check(); - memblock_set_bottom_up(true); - alloc_no_memory_generic_check(); + run_top_down(alloc_no_memory_generic_check); + run_bottom_up(alloc_no_memory_generic_check); return 0; } -int memblock_alloc_checks(void) +static int alloc_too_large_check(void) { - const char *func_testing = "memblock_alloc"; + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_too_large_generic_check); + run_bottom_up(alloc_too_large_generic_check); + return 0; +} + +static int memblock_alloc_checks_internal(int flags) +{ + const char *func = get_memblock_alloc_name(flags); + + alloc_test_flags = flags; prefix_reset(); - prefix_push(func_testing); - test_print("Running %s tests...\n", func_testing); + prefix_push(func); + test_print("Running %s tests...\n", func); reset_memblock_attributes(); dummy_physical_memory_init(); @@ -824,6 +866,7 @@ int memblock_alloc_checks(void) alloc_no_space_check(); alloc_limited_space_check(); alloc_no_memory_check(); + alloc_too_large_check(); dummy_physical_memory_cleanup(); @@ -831,3 +874,11 @@ int memblock_alloc_checks(void) return 0; } + +int memblock_alloc_checks(void) +{ + memblock_alloc_checks_internal(TEST_F_NONE); + memblock_alloc_checks_internal(TEST_F_RAW); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c index 1069b4bdd5fd..3ef9486da8a0 100644 --- a/tools/testing/memblock/tests/alloc_helpers_api.c +++ b/tools/testing/memblock/tests/alloc_helpers_api.c @@ -19,22 +19,18 @@ static int alloc_from_simple_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_16; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES; allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr); - b = (char *)allocated_ptr; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + ASSERT_MEM_EQ(allocated_ptr, 0, size); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, min_addr); @@ -66,23 +62,19 @@ static int alloc_from_misaligned_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_32; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); /* A misaligned address */ min_addr = memblock_end_of_DRAM() - (SMP_CACHE_BYTES * 2 - 1); allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr); - b = (char *)allocated_ptr; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + ASSERT_MEM_EQ(allocated_ptr, 0, size); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - SMP_CACHE_BYTES); @@ -117,12 +109,10 @@ static int alloc_from_top_down_high_addr_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t size = SZ_32; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); /* The address is too close to the end of the memory */ @@ -162,14 +152,12 @@ static int alloc_from_top_down_no_space_above_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_64; phys_addr_t r2_size = SZ_2; phys_addr_t total_size = r1_size + r2_size; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; @@ -201,13 +189,11 @@ static int alloc_from_top_down_min_addr_cap_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_64; phys_addr_t min_addr; phys_addr_t start_addr; + PREFIX_PUSH(); setup_memblock(); start_addr = (phys_addr_t)memblock_start_of_DRAM(); @@ -249,12 +235,10 @@ static int alloc_from_bottom_up_high_addr_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t size = SZ_32; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); /* The address is too close to the end of the memory */ @@ -293,13 +277,11 @@ static int alloc_from_bottom_up_no_space_above_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_64; phys_addr_t min_addr; phys_addr_t r2_size; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SZ_128; @@ -331,13 +313,11 @@ static int alloc_from_bottom_up_min_addr_cap_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_64; phys_addr_t min_addr; phys_addr_t start_addr; + PREFIX_PUSH(); setup_memblock(); start_addr = (phys_addr_t)memblock_start_of_DRAM(); @@ -361,10 +341,8 @@ static int alloc_from_bottom_up_min_addr_cap_check(void) static int alloc_from_simple_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_from_simple_generic_check(); - memblock_set_bottom_up(true); - alloc_from_simple_generic_check(); + run_top_down(alloc_from_simple_generic_check); + run_bottom_up(alloc_from_simple_generic_check); return 0; } @@ -372,10 +350,8 @@ static int alloc_from_simple_check(void) static int alloc_from_misaligned_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_from_misaligned_generic_check(); - memblock_set_bottom_up(true); - alloc_from_misaligned_generic_check(); + run_top_down(alloc_from_misaligned_generic_check); + run_bottom_up(alloc_from_misaligned_generic_check); return 0; } diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 255fd514e9f5..2c2d60f4e3e3 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -1,6 +1,41 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "alloc_nid_api.h" +static int alloc_nid_test_flags = TEST_F_NONE; + +/* + * contains the fraction of MEM_SIZE contained in each node in basis point + * units (one hundredth of 1% or 1/10000) + */ +static const unsigned int node_fractions[] = { + 2500, /* 1/4 */ + 625, /* 1/16 */ + 1250, /* 1/8 */ + 1250, /* 1/8 */ + 625, /* 1/16 */ + 625, /* 1/16 */ + 2500, /* 1/4 */ + 625, /* 1/16 */ +}; + +static inline const char * const get_memblock_alloc_try_nid_name(int flags) +{ + if (flags & TEST_F_RAW) + return "memblock_alloc_try_nid_raw"; + return "memblock_alloc_try_nid"; +} + +static inline void *run_memblock_alloc_try_nid(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr, + phys_addr_t max_addr, int nid) +{ + if (alloc_nid_test_flags & TEST_F_RAW) + return memblock_alloc_try_nid_raw(size, align, min_addr, + max_addr, nid); + return memblock_alloc_try_nid(size, align, min_addr, max_addr, nid); +} + /* * A simple test that tries to allocate a memory region within min_addr and * max_addr range: @@ -13,33 +48,30 @@ * | | * min_addr max_addr * - * Expect to allocate a cleared region that ends at max_addr. + * Expect to allocate a region that ends at max_addr. */ static int alloc_try_nid_top_down_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_128; phys_addr_t min_addr; phys_addr_t max_addr; phys_addr_t rgn_end; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, max_addr - size); @@ -68,34 +100,31 @@ static int alloc_try_nid_top_down_simple_check(void) * Aligned address * boundary * - * Expect to allocate a cleared, aligned region that ends before max_addr. + * Expect to allocate an aligned region that ends before max_addr. */ static int alloc_try_nid_top_down_end_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_128; phys_addr_t misalign = SZ_2; phys_addr_t min_addr; phys_addr_t max_addr; phys_addr_t rgn_end; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512 + misalign; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, max_addr - size - misalign); @@ -121,34 +150,31 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) * | | * min_addr max_addr * - * Expect to allocate a cleared region that starts at min_addr and ends at + * Expect to allocate a region that starts at min_addr and ends at * max_addr, given that min_addr is aligned. */ static int alloc_try_nid_exact_address_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; phys_addr_t rgn_end; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES; max_addr = min_addr + size; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, min_addr); @@ -176,32 +202,29 @@ static int alloc_try_nid_exact_address_generic_check(void) * address | * boundary min_add * - * Expect to drop the lower limit and allocate a cleared memory region which + * Expect to drop the lower limit and allocate a memory region which * ends at max_addr (if the address is aligned). */ static int alloc_try_nid_top_down_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, max_addr - size); @@ -237,20 +260,19 @@ static int alloc_try_nid_top_down_narrow_range_check(void) static int alloc_try_nid_low_max_generic_check(void) { void *allocated_ptr = NULL; - - PREFIX_PUSH(); - phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM(); max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -277,10 +299,6 @@ static int alloc_try_nid_min_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_128; phys_addr_t r2_size = SZ_64; phys_addr_t total_size = r1_size + r2_size; @@ -288,6 +306,7 @@ static int alloc_try_nid_min_reserved_generic_check(void) phys_addr_t max_addr; phys_addr_t reserved_base; + PREFIX_PUSH(); setup_memblock(); max_addr = memblock_end_of_DRAM(); @@ -296,12 +315,12 @@ static int alloc_try_nid_min_reserved_generic_check(void) memblock_reserve(reserved_base, r1_size); - allocated_ptr = memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, reserved_base); @@ -332,16 +351,13 @@ static int alloc_try_nid_max_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t r1_size = SZ_64; phys_addr_t r2_size = SZ_128; phys_addr_t total_size = r1_size + r2_size; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); max_addr = memblock_end_of_DRAM() - r1_size; @@ -349,12 +365,12 @@ static int alloc_try_nid_max_reserved_generic_check(void) memblock_reserve(max_addr, r1_size); - allocated_ptr = memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, min_addr); @@ -389,17 +405,14 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; struct region r1, r2; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_64; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; phys_addr_t max_addr; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; @@ -415,12 +428,12 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); ASSERT_EQ(rgn1->size, r1.size + r3_size); ASSERT_EQ(rgn1->base, max_addr - r3_size); @@ -456,16 +469,13 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; struct region r1, r2; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_64; phys_addr_t total_size; phys_addr_t max_addr; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; @@ -481,12 +491,12 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, total_size); ASSERT_EQ(rgn->base, r2.base); @@ -522,17 +532,14 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; struct region r1, r2; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_256; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; phys_addr_t max_addr; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; @@ -548,12 +555,12 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); ASSERT_EQ(rgn1->size, r1.size); ASSERT_EQ(rgn1->base, r1.base); @@ -593,14 +600,12 @@ static int alloc_try_nid_reserved_all_generic_check(void) { void *allocated_ptr = NULL; struct region r1, r2; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_256; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t max_addr; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES; @@ -615,8 +620,9 @@ static int alloc_try_nid_reserved_all_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -628,31 +634,28 @@ static int alloc_try_nid_reserved_all_generic_check(void) /* * A test that tries to allocate a memory region, where max_addr is * bigger than the end address of the available memory. Expect to allocate - * a cleared region that ends before the end of the memory. + * a region that ends before the end of the memory. */ static int alloc_try_nid_top_down_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_end_of_DRAM() - SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - size); @@ -668,31 +671,28 @@ static int alloc_try_nid_top_down_cap_max_check(void) /* * A test that tries to allocate a memory region, where min_addr is * smaller than the start address of the available memory. Expect to allocate - * a cleared region that ends before the end of the memory. + * a region that ends before the end of the memory. */ static int alloc_try_nid_top_down_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() - SZ_256; max_addr = memblock_end_of_DRAM(); - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - size); @@ -717,34 +717,30 @@ static int alloc_try_nid_top_down_cap_min_check(void) * | | * min_addr max_addr * - * Expect to allocate a cleared region that ends before max_addr. + * Expect to allocate a region that ends before max_addr. */ static int alloc_try_nid_bottom_up_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_128; phys_addr_t min_addr; phys_addr_t max_addr; phys_addr_t rgn_end; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, min_addr); @@ -773,35 +769,31 @@ static int alloc_try_nid_bottom_up_simple_check(void) * Aligned address * boundary * - * Expect to allocate a cleared, aligned region that ends before max_addr. + * Expect to allocate an aligned region that ends before max_addr. */ static int alloc_try_nid_bottom_up_start_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_128; phys_addr_t misalign = SZ_2; phys_addr_t min_addr; phys_addr_t max_addr; phys_addr_t rgn_end; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + misalign; max_addr = min_addr + SZ_512; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, min_addr + (SMP_CACHE_BYTES - misalign)); @@ -829,33 +821,29 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) * | * min_add * - * Expect to drop the lower limit and allocate a cleared memory region which + * Expect to drop the lower limit and allocate a memory region which * starts at the beginning of the available memory. */ static int alloc_try_nid_bottom_up_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); @@ -890,17 +878,14 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; struct region r1, r2; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_64; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; phys_addr_t max_addr; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; @@ -916,13 +901,12 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); ASSERT_EQ(rgn1->size, r1.size); ASSERT_EQ(rgn1->base, max_addr); @@ -964,17 +948,14 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) struct memblock_region *rgn2 = &memblock.reserved.regions[1]; struct memblock_region *rgn3 = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; struct region r1, r2; - - PREFIX_PUSH(); - phys_addr_t r3_size = SZ_256; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; phys_addr_t max_addr; phys_addr_t min_addr; + PREFIX_PUSH(); setup_memblock(); r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; @@ -990,13 +971,12 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); ASSERT_EQ(rgn3->size, r3_size); ASSERT_EQ(rgn3->base, memblock_start_of_DRAM()); @@ -1018,32 +998,28 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) /* * A test that tries to allocate a memory region, where max_addr is * bigger than the end address of the available memory. Expect to allocate - * a cleared region that starts at the min_addr + * a region that starts at the min_addr. */ static int alloc_try_nid_bottom_up_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM() + SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, min_addr); @@ -1059,32 +1035,28 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) /* * A test that tries to allocate a memory region, where min_addr is * smaller than the start address of the available memory. Expect to allocate - * a cleared region at the beginning of the available memory. + * a region at the beginning of the available memory. */ static int alloc_try_nid_bottom_up_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; - char *b; - - PREFIX_PUSH(); - phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; + PREFIX_PUSH(); setup_memblock(); min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM() - SZ_256; - allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); - b = (char *)allocated_ptr; + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); - ASSERT_EQ(*b, 0); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); ASSERT_EQ(rgn->size, size); ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); @@ -1097,7 +1069,7 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) return 0; } -/* Test case wrappers */ +/* Test case wrappers for range tests */ static int alloc_try_nid_simple_check(void) { test_print("\tRunning %s...\n", __func__); @@ -1178,10 +1150,8 @@ static int alloc_try_nid_cap_min_check(void) static int alloc_try_nid_min_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_try_nid_min_reserved_generic_check(); - memblock_set_bottom_up(true); - alloc_try_nid_min_reserved_generic_check(); + run_top_down(alloc_try_nid_min_reserved_generic_check); + run_bottom_up(alloc_try_nid_min_reserved_generic_check); return 0; } @@ -1189,10 +1159,8 @@ static int alloc_try_nid_min_reserved_check(void) static int alloc_try_nid_max_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_try_nid_max_reserved_generic_check(); - memblock_set_bottom_up(true); - alloc_try_nid_max_reserved_generic_check(); + run_top_down(alloc_try_nid_max_reserved_generic_check); + run_bottom_up(alloc_try_nid_max_reserved_generic_check); return 0; } @@ -1200,10 +1168,8 @@ static int alloc_try_nid_max_reserved_check(void) static int alloc_try_nid_exact_address_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_try_nid_exact_address_generic_check(); - memblock_set_bottom_up(true); - alloc_try_nid_exact_address_generic_check(); + run_top_down(alloc_try_nid_exact_address_generic_check); + run_bottom_up(alloc_try_nid_exact_address_generic_check); return 0; } @@ -1211,10 +1177,8 @@ static int alloc_try_nid_exact_address_check(void) static int alloc_try_nid_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_try_nid_reserved_full_merge_generic_check(); - memblock_set_bottom_up(true); - alloc_try_nid_reserved_full_merge_generic_check(); + run_top_down(alloc_try_nid_reserved_full_merge_generic_check); + run_bottom_up(alloc_try_nid_reserved_full_merge_generic_check); return 0; } @@ -1222,10 +1186,8 @@ static int alloc_try_nid_reserved_full_merge_check(void) static int alloc_try_nid_reserved_all_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_try_nid_reserved_all_generic_check(); - memblock_set_bottom_up(true); - alloc_try_nid_reserved_all_generic_check(); + run_top_down(alloc_try_nid_reserved_all_generic_check); + run_bottom_up(alloc_try_nid_reserved_all_generic_check); return 0; } @@ -1233,24 +1195,16 @@ static int alloc_try_nid_reserved_all_check(void) static int alloc_try_nid_low_max_check(void) { test_print("\tRunning %s...\n", __func__); - memblock_set_bottom_up(false); - alloc_try_nid_low_max_generic_check(); - memblock_set_bottom_up(true); - alloc_try_nid_low_max_generic_check(); + run_top_down(alloc_try_nid_low_max_generic_check); + run_bottom_up(alloc_try_nid_low_max_generic_check); return 0; } -int memblock_alloc_nid_checks(void) +static int memblock_alloc_nid_range_checks(void) { - const char *func_testing = "memblock_alloc_try_nid"; - - prefix_reset(); - prefix_push(func_testing); - test_print("Running %s tests...\n", func_testing); - - reset_memblock_attributes(); - dummy_physical_memory_init(); + test_print("Running %s range tests...\n", + get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); alloc_try_nid_simple_check(); alloc_try_nid_misaligned_check(); @@ -1267,9 +1221,1453 @@ int memblock_alloc_nid_checks(void) alloc_try_nid_reserved_all_check(); alloc_try_nid_low_max_check(); + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the end of the requested node. + */ +static int alloc_try_nid_top_down_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size: + * + * | +-----+ +------------------+ | + * | | req | | expected | | + * +---+-----+----------+------------------+-----+ + * + * | +---------+ | + * | | rgn | | + * +-----------------------------+---------+-----+ + * + * Expect to allocate an aligned region at the end of the last node that has + * enough memory (in this case, nid = 6) after falling back to NUMA_NO_NODE. + */ +static int alloc_try_nid_top_down_numa_small_node_check(void) +{ + int nid_req = 1; + int nid_exp = 6; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_2 * req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(exp_node) - size); + ASSERT_LE(exp_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is fully reserved: + * + * | +---------+ +------------------+ | + * | |requested| | expected | | + * +--------------+---------+------------+------------------+-----+ + * + * | +---------+ +---------+ | + * | | reserved| | new | | + * +--------------+---------+---------------------+---------+-----+ + * + * Expect to allocate an aligned region at the end of the last node that is + * large enough and has enough unreserved memory (in this case, nid = 6) after + * falling back to NUMA_NO_NODE. The region count and total size get updated. + */ +static int alloc_try_nid_top_down_numa_node_reserved_check(void) +{ + int nid_req = 2; + int nid_exp = 6; + struct memblock_region *new_rgn = &memblock.reserved.regions[1]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(req_node->base, req_node->size); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(exp_node) - size); + ASSERT_LE(exp_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + req_node->size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+----------+ + * + * | +------------------+ +-----+ | + * | | reserved | | new | | + * +-----------+------------------+--------------+-----+----------+ + * + * Expect to allocate an aligned region at the end of the requested node. The + * region count and total size get updated. + */ +static int alloc_try_nid_top_down_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[1]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + r1.size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved and does not have enough contiguous memory for the + * allocated region: + * + * | +-----------------------+ +----------------------| + * | | requested | | expected | + * +-----------+-----------------------+---------+----------------------+ + * + * | +----------+ +-----------| + * | | reserved | | new | + * +-----------------+----------+---------------------------+-----------+ + * + * Expect to allocate an aligned region at the end of the last node that is + * large enough and has enough unreserved memory (in this case, + * nid = NUMA_NODES - 1) after falling back to NUMA_NO_NODE. The region count + * and total size get updated. + */ +static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) +{ + int nid_req = 4; + int nid_exp = NUMA_NODES - 1; + struct memblock_region *new_rgn = &memblock.reserved.regions[1]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_2; + r1.base = req_node->base + (size / SZ_2); + r1.size = size; + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(exp_node) - size); + ASSERT_LE(exp_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + r1.size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------------------+-----------+--------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_try_nid_top_down_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node_end - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the second + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +--------------------------+---------+ | + * | | expected |requested| | + * +------+--------------------------+---------+----------------+ + * + + + * | +---------+ | + * | | rgn | | + * +-----------------------+---------+--------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that + * ends at the end of the first node that overlaps with the range. + */ +static int alloc_try_nid_top_down_numa_split_range_high_check(void) +{ + int nid_req = 3; + int nid_exp = nid_req - 1; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t exp_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + exp_node_end = region_end(exp_node); + min_addr = exp_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, exp_node_end - size); + ASSERT_LE(exp_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+----------+ + * + + + * | +---------+ | + * | | rgn | | + * +----------+---------+-------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * | +-----+ | + * | | rgn | | + * +---------------------------------------------------+-----+------+ + * + * Expect to allocate a memory region at the end of the final node in + * the range after falling back to NUMA_NO_NODE. + */ +static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, max_addr - size); + ASSERT_LE(max_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node starts + * after max_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * | +----------+----...----+----------+ +-----------+ | + * | | min node | ... | max node | | requested | | + * +-----+----------+----...----+----------+--------+-----------+---+ + * + + + * | +-----+ | + * | | rgn | | + * +---------------------------------+-----+------------------------+ + * + * Expect to allocate a memory region at the end of the final node in + * the range after falling back to NUMA_NO_NODE. + */ +static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) +{ + int nid_req = 7; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, max_addr - size); + ASSERT_LE(max_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the beginning of the requested node. + */ +static int alloc_try_nid_bottom_up_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size: + * + * |----------------------+-----+ | + * | expected | req | | + * +----------------------+-----+----------------+ + * + * |---------+ | + * | rgn | | + * +---------+-----------------------------------+ + * + * Expect to allocate an aligned region at the beginning of the first node that + * has enough memory (in this case, nid = 0) after falling back to NUMA_NO_NODE. + */ +static int alloc_try_nid_bottom_up_numa_small_node_check(void) +{ + int nid_req = 1; + int nid_exp = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_2 * req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, exp_node->base); + ASSERT_LE(region_end(new_rgn), region_end(exp_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is fully reserved: + * + * |----------------------+ +-----------+ | + * | expected | | requested | | + * +----------------------+-----+-----------+--------------------+ + * + * |-----------+ +-----------+ | + * | new | | reserved | | + * +-----------+----------------+-----------+--------------------+ + * + * Expect to allocate an aligned region at the beginning of the first node that + * is large enough and has enough unreserved memory (in this case, nid = 0) + * after falling back to NUMA_NO_NODE. The region count and total size get + * updated. + */ +static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) +{ + int nid_req = 2; + int nid_exp = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(req_node->base, req_node->size); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, exp_node->base); + ASSERT_LE(region_end(new_rgn), region_end(exp_node)); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + req_node->size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+---------+ + * + * | +------------------+-----+ | + * | | reserved | new | | + * +-----------+------------------+-----+------------------------+ + * + * Expect to allocate an aligned region in the requested node that merges with + * the existing reserved region. The total size gets updated. + */ +static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t total_size; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + total_size = size + r1.size; + + memblock_reserve(r1.base, r1.size); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved and does not have enough contiguous memory for the + * allocated region: + * + * |----------------------+ +-----------------------+ | + * | expected | | requested | | + * +----------------------+-------+-----------------------+---------+ + * + * |-----------+ +----------+ | + * | new | | reserved | | + * +-----------+------------------------+----------+----------------+ + * + * Expect to allocate an aligned region at the beginning of the first + * node that is large enough and has enough unreserved memory (in this case, + * nid = 0) after falling back to NUMA_NO_NODE. The region count and total size + * get updated. + */ +static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) +{ + int nid_req = 4; + int nid_exp = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_2; + r1.base = req_node->base + (size / SZ_2); + r1.size = size; + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, exp_node->base); + ASSERT_LE(region_end(new_rgn), region_end(exp_node)); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + r1.size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------+-----------+--------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region at the beginning + * of the requested node. + */ +static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), req_node_end); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the second + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * |------------------+ +----------------------+---------+ | + * | expected | | previous |requested| | + * +------------------+--------+----------------------+---------+------+ + * + + + * |---------+ | + * | rgn | | + * +---------+---------------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region at the beginning + * of the first node that has enough memory. + */ +static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) +{ + int nid_req = 3; + int nid_exp = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *exp_node = &memblock.memory.regions[nid_exp]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t exp_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + exp_node_end = region_end(req_node); + min_addr = req_node->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, exp_node->base); + ASSERT_LE(region_end(new_rgn), exp_node_end); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+---------+ + * + + + * | +---------+ | + * | | rgn | | + * +----+---------+------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * | +-----+ | + * | | rgn | | + * +-----------------------+-----+----------------------------------+ + * + * Expect to allocate a memory region at the beginning of the first node + * in the range after falling back to NUMA_NO_NODE. + */ +static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, min_addr); + ASSERT_LE(region_end(new_rgn), region_end(min_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node starts + * after max_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * | +----------+----...----+----------+ +---------+ | + * | | min node | ... | max node | |requested| | + * +-----+----------+----...----+----------+---------+---------+---+ + * + + + * | +-----+ | + * | | rgn | | + * +-----+-----+---------------------------------------------------+ + * + * Expect to allocate a memory region at the beginning of the first node + * in the range after falling back to NUMA_NO_NODE. + */ +static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) +{ + int nid_req = 7; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, min_addr); + ASSERT_LE(region_end(new_rgn), region_end(min_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size. + * Additionally, none of the nodes have enough memory to allocate the region: + * + * +-----------------------------------+ + * | new | + * +-----------------------------------+ + * |-------+-------+-------+-------+-------+-------+-------+-------| + * | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Expect no allocation to happen. + */ +static int alloc_try_nid_numa_large_region_generic_check(void) +{ + int nid_req = 3; + void *allocated_ptr = NULL; + phys_addr_t size = MEM_SIZE / SZ_2; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_addr range when + * there are two reserved regions at the borders. The requested node starts at + * min_addr and ends at max_addr and is the same size as the region to be + * allocated: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------+-----------------------+-----------------------| + * | | node5 | requested | node7 | + * +------+-----------+-----------------------+-----------------------+ + * + + + * | +----+-----------------------+----+ | + * | | r2 | new | r1 | | + * +-------------+----+-----------------------+----+------------------+ + * + * Expect to merge all of the regions into one. The region counter and total + * size fields get updated. + */ +static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) +{ + int nid_req = 6; + int nid_next = nid_req + 1; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *next_node = &memblock.memory.regions[nid_next]; + void *allocated_ptr = NULL; + struct region r1, r2; + phys_addr_t size = req_node->size; + phys_addr_t total_size; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r1.base = next_node->base; + r1.size = SZ_128; + + r2.size = SZ_128; + r2.base = r1.base - (size + r2.size); + + total_size = r1.size + r2.size + size; + min_addr = r2.base + r2.size; + max_addr = r1.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); + + ASSERT_NE(allocated_ptr, NULL); + assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, r2.base); + + ASSERT_LE(new_rgn->base, req_node->base); + ASSERT_LE(region_end(req_node), region_end(new_rgn)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range, + * where the total range can fit the region, but it is split between two nodes + * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE + * instead of requesting a specific node: + * + * +-----------+ + * | new | + * +-----------+ + * | +---------------------+-----------| + * | | prev node | next node | + * +------+---------------------+-----------+ + * + + + * |----------------------+ +-----| + * | r1 | | r2 | + * +----------------------+-----------+-----+ + * ^ ^ + * | | + * | max_addr + * | + * min_addr + * + * Expect no allocation to happen. + */ +static int alloc_try_nid_numa_split_all_reserved_generic_check(void) +{ + void *allocated_ptr = NULL; + struct memblock_region *next_node = &memblock.memory.regions[7]; + struct region r1, r2; + phys_addr_t size = SZ_256; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r2.base = next_node->base + SZ_128; + r2.size = memblock_end_of_DRAM() - r2.base; + + r1.size = MEM_SIZE - (r2.size + size); + r1.base = memblock_start_of_DRAM(); + + min_addr = r1.base + r1.size; + max_addr = r2.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* Test case wrappers for NUMA tests */ +static int alloc_try_nid_numa_simple_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_simple_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_simple_check(); + + return 0; +} + +static int alloc_try_nid_numa_small_node_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_small_node_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_small_node_check(); + + return 0; +} + +static int alloc_try_nid_numa_node_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_node_reserved_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_node_reserved_check(); + + return 0; +} + +static int alloc_try_nid_numa_part_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_part_reserved_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_part_reserved_check(); + + return 0; +} + +static int alloc_try_nid_numa_part_reserved_fallback_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_part_reserved_fallback_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(); + + return 0; +} + +static int alloc_try_nid_numa_split_range_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_split_range_low_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_split_range_low_check(); + + return 0; +} + +static int alloc_try_nid_numa_split_range_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_split_range_high_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_split_range_high_check(); + + return 0; +} + +static int alloc_try_nid_numa_no_overlap_split_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_no_overlap_split_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_no_overlap_split_check(); + + return 0; +} + +static int alloc_try_nid_numa_no_overlap_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_no_overlap_low_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_no_overlap_low_check(); + + return 0; +} + +static int alloc_try_nid_numa_no_overlap_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_try_nid_top_down_numa_no_overlap_high_check(); + memblock_set_bottom_up(true); + alloc_try_nid_bottom_up_numa_no_overlap_high_check(); + + return 0; +} + +static int alloc_try_nid_numa_large_region_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_try_nid_numa_large_region_generic_check); + run_bottom_up(alloc_try_nid_numa_large_region_generic_check); + + return 0; +} + +static int alloc_try_nid_numa_reserved_full_merge_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_try_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_try_nid_numa_reserved_full_merge_generic_check); + + return 0; +} + +static int alloc_try_nid_numa_split_all_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_try_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_try_nid_numa_split_all_reserved_generic_check); + + return 0; +} + +int __memblock_alloc_nid_numa_checks(void) +{ + test_print("Running %s NUMA tests...\n", + get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + + alloc_try_nid_numa_simple_check(); + alloc_try_nid_numa_small_node_check(); + alloc_try_nid_numa_node_reserved_check(); + alloc_try_nid_numa_part_reserved_check(); + alloc_try_nid_numa_part_reserved_fallback_check(); + alloc_try_nid_numa_split_range_low_check(); + alloc_try_nid_numa_split_range_high_check(); + + alloc_try_nid_numa_no_overlap_split_check(); + alloc_try_nid_numa_no_overlap_low_check(); + alloc_try_nid_numa_no_overlap_high_check(); + alloc_try_nid_numa_large_region_check(); + alloc_try_nid_numa_reserved_full_merge_check(); + alloc_try_nid_numa_split_all_reserved_check(); + + return 0; +} + +static int memblock_alloc_nid_checks_internal(int flags) +{ + alloc_nid_test_flags = flags; + + prefix_reset(); + prefix_push(get_memblock_alloc_try_nid_name(flags)); + + reset_memblock_attributes(); + dummy_physical_memory_init(); + + memblock_alloc_nid_range_checks(); + memblock_alloc_nid_numa_checks(); + dummy_physical_memory_cleanup(); prefix_pop(); return 0; } + +int memblock_alloc_nid_checks(void) +{ + memblock_alloc_nid_checks_internal(TEST_F_NONE); + memblock_alloc_nid_checks_internal(TEST_F_RAW); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h index b35cf3c3f489..92d07d230e18 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.h +++ b/tools/testing/memblock/tests/alloc_nid_api.h @@ -5,5 +5,21 @@ #include "common.h" int memblock_alloc_nid_checks(void); +int __memblock_alloc_nid_numa_checks(void); + +#ifdef CONFIG_NUMA +static inline int memblock_alloc_nid_numa_checks(void) +{ + __memblock_alloc_nid_numa_checks(); + return 0; +} + +#else +static inline int memblock_alloc_nid_numa_checks(void) +{ + return 0; +} + +#endif /* CONFIG_NUMA */ #endif diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 66f46f261e66..a13a57ba0815 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -8,6 +8,7 @@ #define FUNC_RESERVE "memblock_reserve" #define FUNC_REMOVE "memblock_remove" #define FUNC_FREE "memblock_free" +#define FUNC_TRIM "memblock_trim_memory" static int memblock_initialization_check(void) { @@ -326,6 +327,102 @@ static int memblock_add_twice_check(void) return 0; } +/* + * A test that tries to add two memory blocks that don't overlap with one + * another and then add a third memory block in the space between the first two: + * + * | +--------+--------+--------+ | + * | | r1 | r3 | r2 | | + * +--------+--------+--------+--------+--+ + * + * Expect to merge the three entries into one region that starts at r1.base + * and has size of r1.size + r2.size + r3.size. The region counter and total + * size of the available memory are updated. + */ +static int memblock_add_between_check(void) +{ + struct memblock_region *rgn; + phys_addr_t total_size; + + rgn = &memblock.memory.regions[0]; + + struct region r1 = { + .base = SZ_1G, + .size = SZ_8K + }; + struct region r2 = { + .base = SZ_1G + SZ_16K, + .size = SZ_8K + }; + struct region r3 = { + .base = SZ_1G + SZ_8K, + .size = SZ_8K + }; + + PREFIX_PUSH(); + + total_size = r1.size + r2.size + r3.size; + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_add(r2.base, r2.size); + memblock_add(r3.base, r3.size); + + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A simple test that tries to add a memory block r when r extends past + * PHYS_ADDR_MAX: + * + * +--------+ + * | r | + * +--------+ + * | +----+ + * | | rgn| + * +----------------------------+----+ + * + * Expect to add a memory block of size PHYS_ADDR_MAX - r.base. Expect the + * total size of available memory and the counter to be updated. + */ +static int memblock_add_near_max_check(void) +{ + struct memblock_region *rgn; + phys_addr_t total_size; + + rgn = &memblock.memory.regions[0]; + + struct region r = { + .base = PHYS_ADDR_MAX - SZ_1M, + .size = SZ_2M + }; + + PREFIX_PUSH(); + + total_size = PHYS_ADDR_MAX - r.base; + + reset_memblock_regions(); + memblock_add(r.base, r.size); + + ASSERT_EQ(rgn->base, r.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); + + test_pass_pop(); + + return 0; +} + static int memblock_add_checks(void) { prefix_reset(); @@ -339,6 +436,8 @@ static int memblock_add_checks(void) memblock_add_overlap_bottom_check(); memblock_add_within_check(); memblock_add_twice_check(); + memblock_add_between_check(); + memblock_add_near_max_check(); prefix_pop(); @@ -604,6 +703,102 @@ static int memblock_reserve_twice_check(void) return 0; } +/* + * A test that tries to mark two memory blocks that don't overlap as reserved + * and then reserve a third memory block in the space between the first two: + * + * | +--------+--------+--------+ | + * | | r1 | r3 | r2 | | + * +--------+--------+--------+--------+--+ + * + * Expect to merge the three entries into one reserved region that starts at + * r1.base and has size of r1.size + r2.size + r3.size. The region counter and + * total for memblock.reserved are updated. + */ +static int memblock_reserve_between_check(void) +{ + struct memblock_region *rgn; + phys_addr_t total_size; + + rgn = &memblock.reserved.regions[0]; + + struct region r1 = { + .base = SZ_1G, + .size = SZ_8K + }; + struct region r2 = { + .base = SZ_1G + SZ_16K, + .size = SZ_8K + }; + struct region r3 = { + .base = SZ_1G + SZ_8K, + .size = SZ_8K + }; + + PREFIX_PUSH(); + + total_size = r1.size + r2.size + r3.size; + + reset_memblock_regions(); + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + memblock_reserve(r3.base, r3.size); + + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A simple test that tries to reserve a memory block r when r extends past + * PHYS_ADDR_MAX: + * + * +--------+ + * | r | + * +--------+ + * | +----+ + * | | rgn| + * +----------------------------+----+ + * + * Expect to reserve a memory block of size PHYS_ADDR_MAX - r.base. Expect the + * total size of reserved memory and the counter to be updated. + */ +static int memblock_reserve_near_max_check(void) +{ + struct memblock_region *rgn; + phys_addr_t total_size; + + rgn = &memblock.reserved.regions[0]; + + struct region r = { + .base = PHYS_ADDR_MAX - SZ_1M, + .size = SZ_2M + }; + + PREFIX_PUSH(); + + total_size = PHYS_ADDR_MAX - r.base; + + reset_memblock_regions(); + memblock_reserve(r.base, r.size); + + ASSERT_EQ(rgn->base, r.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -616,6 +811,8 @@ static int memblock_reserve_checks(void) memblock_reserve_overlap_bottom_check(); memblock_reserve_within_check(); memblock_reserve_twice_check(); + memblock_reserve_between_check(); + memblock_reserve_near_max_check(); prefix_pop(); @@ -887,6 +1084,155 @@ static int memblock_remove_within_check(void) return 0; } +/* + * A simple test that tries to remove a region r1 from the array of + * available memory regions when r1 is the only available region. + * Expect to add a memory block r1 and then remove r1 so that a dummy + * region is added. The region counter stays the same, and the total size + * is updated. + */ +static int memblock_remove_only_region_check(void) +{ + struct memblock_region *rgn; + + rgn = &memblock.memory.regions[0]; + + struct region r1 = { + .base = SZ_2K, + .size = SZ_4K + }; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_remove(r1.base, r1.size); + + ASSERT_EQ(rgn->base, 0); + ASSERT_EQ(rgn->size, 0); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, 0); + + test_pass_pop(); + + return 0; +} + +/* + * A simple test that tries remove a region r2 from the array of available + * memory regions when r2 extends past PHYS_ADDR_MAX: + * + * +--------+ + * | r2 | + * +--------+ + * | +---+....+ + * | |rgn| | + * +------------------------+---+----+ + * + * Expect that only the portion between PHYS_ADDR_MAX and r2.base is removed. + * Expect the total size of available memory to be updated and the counter to + * not be updated. + */ +static int memblock_remove_near_max_check(void) +{ + struct memblock_region *rgn; + phys_addr_t total_size; + + rgn = &memblock.memory.regions[0]; + + struct region r1 = { + .base = PHYS_ADDR_MAX - SZ_2M, + .size = SZ_2M + }; + + struct region r2 = { + .base = PHYS_ADDR_MAX - SZ_1M, + .size = SZ_2M + }; + + PREFIX_PUSH(); + + total_size = r1.size - (PHYS_ADDR_MAX - r2.base); + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_remove(r2.base, r2.size); + + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to remove a region r3 that overlaps with two existing + * regions r1 and r2: + * + * +----------------+ + * | r3 | + * +----------------+ + * | +----+..... ........+--------+ + * | | |r1 : : |r2 | | + * +----+----+----+---+-------+--------+-----+ + * + * Expect that only the intersections of r1 with r3 and r2 with r3 are removed + * from the available memory pool. Expect the total size of available memory to + * be updated and the counter to not be updated. + */ +static int memblock_remove_overlap_two_check(void) +{ + struct memblock_region *rgn1, *rgn2; + phys_addr_t new_r1_size, new_r2_size, r2_end, r3_end, total_size; + + rgn1 = &memblock.memory.regions[0]; + rgn2 = &memblock.memory.regions[1]; + + struct region r1 = { + .base = SZ_16M, + .size = SZ_32M + }; + struct region r2 = { + .base = SZ_64M, + .size = SZ_64M + }; + struct region r3 = { + .base = SZ_32M, + .size = SZ_64M + }; + + PREFIX_PUSH(); + + r2_end = r2.base + r2.size; + r3_end = r3.base + r3.size; + new_r1_size = r3.base - r1.base; + new_r2_size = r2_end - r3_end; + total_size = new_r1_size + new_r2_size; + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_add(r2.base, r2.size); + memblock_remove(r3.base, r3.size); + + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, new_r1_size); + + ASSERT_EQ(rgn2->base, r3_end); + ASSERT_EQ(rgn2->size, new_r2_size); + + ASSERT_EQ(memblock.memory.cnt, 2); + ASSERT_EQ(memblock.memory.total_size, total_size); + + test_pass_pop(); + + return 0; +} + static int memblock_remove_checks(void) { prefix_reset(); @@ -898,6 +1244,9 @@ static int memblock_remove_checks(void) memblock_remove_overlap_top_check(); memblock_remove_overlap_bottom_check(); memblock_remove_within_check(); + memblock_remove_only_region_check(); + memblock_remove_near_max_check(); + memblock_remove_overlap_two_check(); prefix_pop(); @@ -1163,6 +1512,154 @@ static int memblock_free_within_check(void) return 0; } +/* + * A simple test that tries to free a memory block r1 that was marked + * earlier as reserved when r1 is the only available region. + * Expect to reserve a memory block r1 and then free r1 so that r1 is + * overwritten with a dummy region. The region counter stays the same, + * and the total size is updated. + */ +static int memblock_free_only_region_check(void) +{ + struct memblock_region *rgn; + + rgn = &memblock.reserved.regions[0]; + + struct region r1 = { + .base = SZ_2K, + .size = SZ_4K + }; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_reserve(r1.base, r1.size); + memblock_free((void *)r1.base, r1.size); + + ASSERT_EQ(rgn->base, 0); + ASSERT_EQ(rgn->size, 0); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, 0); + + test_pass_pop(); + + return 0; +} + +/* + * A simple test that tries free a region r2 when r2 extends past PHYS_ADDR_MAX: + * + * +--------+ + * | r2 | + * +--------+ + * | +---+....+ + * | |rgn| | + * +------------------------+---+----+ + * + * Expect that only the portion between PHYS_ADDR_MAX and r2.base is freed. + * Expect the total size of reserved memory to be updated and the counter to + * not be updated. + */ +static int memblock_free_near_max_check(void) +{ + struct memblock_region *rgn; + phys_addr_t total_size; + + rgn = &memblock.reserved.regions[0]; + + struct region r1 = { + .base = PHYS_ADDR_MAX - SZ_2M, + .size = SZ_2M + }; + + struct region r2 = { + .base = PHYS_ADDR_MAX - SZ_1M, + .size = SZ_2M + }; + + PREFIX_PUSH(); + + total_size = r1.size - (PHYS_ADDR_MAX - r2.base); + + reset_memblock_regions(); + memblock_reserve(r1.base, r1.size); + memblock_free((void *)r2.base, r2.size); + + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to free a reserved region r3 that overlaps with two + * existing reserved regions r1 and r2: + * + * +----------------+ + * | r3 | + * +----------------+ + * | +----+..... ........+--------+ + * | | |r1 : : |r2 | | + * +----+----+----+---+-------+--------+-----+ + * + * Expect that only the intersections of r1 with r3 and r2 with r3 are freed + * from the collection of reserved memory. Expect the total size of reserved + * memory to be updated and the counter to not be updated. + */ +static int memblock_free_overlap_two_check(void) +{ + struct memblock_region *rgn1, *rgn2; + phys_addr_t new_r1_size, new_r2_size, r2_end, r3_end, total_size; + + rgn1 = &memblock.reserved.regions[0]; + rgn2 = &memblock.reserved.regions[1]; + + struct region r1 = { + .base = SZ_16M, + .size = SZ_32M + }; + struct region r2 = { + .base = SZ_64M, + .size = SZ_64M + }; + struct region r3 = { + .base = SZ_32M, + .size = SZ_64M + }; + + PREFIX_PUSH(); + + r2_end = r2.base + r2.size; + r3_end = r3.base + r3.size; + new_r1_size = r3.base - r1.base; + new_r2_size = r2_end - r3_end; + total_size = new_r1_size + new_r2_size; + + reset_memblock_regions(); + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + memblock_free((void *)r3.base, r3.size); + + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, new_r1_size); + + ASSERT_EQ(rgn2->base, r3_end); + ASSERT_EQ(rgn2->size, new_r2_size); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + static int memblock_free_checks(void) { prefix_reset(); @@ -1174,6 +1671,274 @@ static int memblock_free_checks(void) memblock_free_overlap_top_check(); memblock_free_overlap_bottom_check(); memblock_free_within_check(); + memblock_free_only_region_check(); + memblock_free_near_max_check(); + memblock_free_overlap_two_check(); + + prefix_pop(); + + return 0; +} + +static int memblock_set_bottom_up_check(void) +{ + prefix_push("memblock_set_bottom_up"); + + memblock_set_bottom_up(false); + ASSERT_EQ(memblock.bottom_up, false); + memblock_set_bottom_up(true); + ASSERT_EQ(memblock.bottom_up, true); + + reset_memblock_attributes(); + test_pass_pop(); + + return 0; +} + +static int memblock_bottom_up_check(void) +{ + prefix_push("memblock_bottom_up"); + + memblock_set_bottom_up(false); + ASSERT_EQ(memblock_bottom_up(), memblock.bottom_up); + ASSERT_EQ(memblock_bottom_up(), false); + memblock_set_bottom_up(true); + ASSERT_EQ(memblock_bottom_up(), memblock.bottom_up); + ASSERT_EQ(memblock_bottom_up(), true); + + reset_memblock_attributes(); + test_pass_pop(); + + return 0; +} + +static int memblock_bottom_up_checks(void) +{ + test_print("Running memblock_*bottom_up tests...\n"); + + prefix_reset(); + memblock_set_bottom_up_check(); + prefix_reset(); + memblock_bottom_up_check(); + + return 0; +} + +/* + * A test that tries to trim memory when both ends of the memory region are + * aligned. Expect that the memory will not be trimmed. Expect the counter to + * not be updated. + */ +static int memblock_trim_memory_aligned_check(void) +{ + struct memblock_region *rgn; + const phys_addr_t alignment = SMP_CACHE_BYTES; + + rgn = &memblock.memory.regions[0]; + + struct region r = { + .base = alignment, + .size = alignment * 4 + }; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_add(r.base, r.size); + memblock_trim_memory(alignment); + + ASSERT_EQ(rgn->base, r.base); + ASSERT_EQ(rgn->size, r.size); + + ASSERT_EQ(memblock.memory.cnt, 1); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to trim memory when there are two available regions, r1 and + * r2. Region r1 is aligned on both ends and region r2 is unaligned on one end + * and smaller than the alignment: + * + * alignment + * |--------| + * | +-----------------+ +------+ | + * | | r1 | | r2 | | + * +--------+-----------------+--------+------+---+ + * ^ ^ ^ ^ ^ + * |________|________|________| | + * | Unaligned address + * Aligned addresses + * + * Expect that r1 will not be trimmed and r2 will be removed. Expect the + * counter to be updated. + */ +static int memblock_trim_memory_too_small_check(void) +{ + struct memblock_region *rgn; + const phys_addr_t alignment = SMP_CACHE_BYTES; + + rgn = &memblock.memory.regions[0]; + + struct region r1 = { + .base = alignment, + .size = alignment * 2 + }; + struct region r2 = { + .base = alignment * 4, + .size = alignment - SZ_2 + }; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_add(r2.base, r2.size); + memblock_trim_memory(alignment); + + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, r1.size); + + ASSERT_EQ(memblock.memory.cnt, 1); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to trim memory when there are two available regions, r1 and + * r2. Region r1 is aligned on both ends and region r2 is unaligned at the base + * and aligned at the end: + * + * Unaligned address + * | + * v + * | +-----------------+ +---------------+ | + * | | r1 | | r2 | | + * +--------+-----------------+----------+---------------+---+ + * ^ ^ ^ ^ ^ ^ + * |________|________|________|________|________| + * | + * Aligned addresses + * + * Expect that r1 will not be trimmed and r2 will be trimmed at the base. + * Expect the counter to not be updated. + */ +static int memblock_trim_memory_unaligned_base_check(void) +{ + struct memblock_region *rgn1, *rgn2; + const phys_addr_t alignment = SMP_CACHE_BYTES; + phys_addr_t offset = SZ_2; + phys_addr_t new_r2_base, new_r2_size; + + rgn1 = &memblock.memory.regions[0]; + rgn2 = &memblock.memory.regions[1]; + + struct region r1 = { + .base = alignment, + .size = alignment * 2 + }; + struct region r2 = { + .base = alignment * 4 + offset, + .size = alignment * 2 - offset + }; + + PREFIX_PUSH(); + + new_r2_base = r2.base + (alignment - offset); + new_r2_size = r2.size - (alignment - offset); + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_add(r2.base, r2.size); + memblock_trim_memory(alignment); + + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, r1.size); + + ASSERT_EQ(rgn2->base, new_r2_base); + ASSERT_EQ(rgn2->size, new_r2_size); + + ASSERT_EQ(memblock.memory.cnt, 2); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to trim memory when there are two available regions, r1 and + * r2. Region r1 is aligned on both ends and region r2 is aligned at the base + * and unaligned at the end: + * + * Unaligned address + * | + * v + * | +-----------------+ +---------------+ | + * | | r1 | | r2 | | + * +--------+-----------------+--------+---------------+---+ + * ^ ^ ^ ^ ^ ^ + * |________|________|________|________|________| + * | + * Aligned addresses + * + * Expect that r1 will not be trimmed and r2 will be trimmed at the end. + * Expect the counter to not be updated. + */ +static int memblock_trim_memory_unaligned_end_check(void) +{ + struct memblock_region *rgn1, *rgn2; + const phys_addr_t alignment = SMP_CACHE_BYTES; + phys_addr_t offset = SZ_2; + phys_addr_t new_r2_size; + + rgn1 = &memblock.memory.regions[0]; + rgn2 = &memblock.memory.regions[1]; + + struct region r1 = { + .base = alignment, + .size = alignment * 2 + }; + struct region r2 = { + .base = alignment * 4, + .size = alignment * 2 - offset + }; + + PREFIX_PUSH(); + + new_r2_size = r2.size - (alignment - offset); + + reset_memblock_regions(); + memblock_add(r1.base, r1.size); + memblock_add(r2.base, r2.size); + memblock_trim_memory(alignment); + + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, r1.size); + + ASSERT_EQ(rgn2->base, r2.base); + ASSERT_EQ(rgn2->size, new_r2_size); + + ASSERT_EQ(memblock.memory.cnt, 2); + + test_pass_pop(); + + return 0; +} + +static int memblock_trim_memory_checks(void) +{ + prefix_reset(); + prefix_push(FUNC_TRIM); + test_print("Running %s tests...\n", FUNC_TRIM); + + memblock_trim_memory_aligned_check(); + memblock_trim_memory_too_small_check(); + memblock_trim_memory_unaligned_base_check(); + memblock_trim_memory_unaligned_end_check(); prefix_pop(); @@ -1187,6 +1952,8 @@ int memblock_basic_checks(void) memblock_reserve_checks(); memblock_remove_checks(); memblock_free_checks(); + memblock_bottom_up_checks(); + memblock_trim_memory_checks(); return 0; } diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index e43b2676af81..3f795047bbe1 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -9,19 +9,22 @@ #define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS #define PREFIXES_MAX 15 #define DELIM ": " +#define BASIS 10000 static struct test_memory memory_block; static const char __maybe_unused *prefixes[PREFIXES_MAX]; static int __maybe_unused nr_prefixes; -static const char *short_opts = "mv"; +static const char *short_opts = "hmv"; static const struct option long_opts[] = { + {"help", 0, NULL, 'h'}, {"movable-node", 0, NULL, 'm'}, {"verbose", 0, NULL, 'v'}, {NULL, 0, NULL, 0} }; static const char * const help_opts[] = { + "display this help message and exit", "disallow allocations from regions marked as hotplugged\n\t\t\t" "by simulating enabling the \"movable_node\" kernel\n\t\t\t" "parameter", @@ -58,16 +61,53 @@ void reset_memblock_attributes(void) memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; } +static inline void fill_memblock(void) +{ + memset(memory_block.base, 1, MEM_SIZE); +} + void setup_memblock(void) { reset_memblock_regions(); memblock_add((phys_addr_t)memory_block.base, MEM_SIZE); + fill_memblock(); +} + +/** + * setup_numa_memblock: + * Set up a memory layout with multiple NUMA nodes in a previously allocated + * dummy physical memory. + * @node_fracs: an array representing the fraction of MEM_SIZE contained in + * each node in basis point units (one hundredth of 1% or 1/10000). + * For example, if node 0 should contain 1/8 of MEM_SIZE, + * node_fracs[0] = 1250. + * + * The nids will be set to 0 through NUMA_NODES - 1. + */ +void setup_numa_memblock(const unsigned int node_fracs[]) +{ + phys_addr_t base; + int flags; + + reset_memblock_regions(); + base = (phys_addr_t)memory_block.base; + flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG; + + for (int i = 0; i < NUMA_NODES; i++) { + assert(node_fracs[i] <= BASIS); + phys_addr_t size = MEM_SIZE * node_fracs[i] / BASIS; + + memblock_add_node(base, size, i, flags); + base += size; + } + fill_memblock(); } void dummy_physical_memory_init(void) { memory_block.base = malloc(MEM_SIZE); assert(memory_block.base); + fill_memblock(); } void dummy_physical_memory_cleanup(void) diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 3e7f23d341d7..d6bbbe63bfc3 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -10,13 +10,22 @@ #include <linux/printk.h> #include <../selftests/kselftest.h> -#define MEM_SIZE SZ_16K +#define MEM_SIZE SZ_16K +#define NUMA_NODES 8 + +enum test_flags { + /* No special request. */ + TEST_F_NONE = 0x0, + /* Perform raw allocations (no zeroing of memory). */ + TEST_F_RAW = 0x1, +}; /** * ASSERT_EQ(): * Check the condition * @_expected == @_seen - * If false, print failed test message (if in VERBOSE mode) and then assert + * If false, print failed test message (if running with --verbose) and then + * assert. */ #define ASSERT_EQ(_expected, _seen) do { \ if ((_expected) != (_seen)) \ @@ -28,7 +37,8 @@ * ASSERT_NE(): * Check the condition * @_expected != @_seen - * If false, print failed test message (if in VERBOSE mode) and then assert + * If false, print failed test message (if running with --verbose) and then + * assert. */ #define ASSERT_NE(_expected, _seen) do { \ if ((_expected) == (_seen)) \ @@ -40,7 +50,8 @@ * ASSERT_LT(): * Check the condition * @_expected < @_seen - * If false, print failed test message (if in VERBOSE mode) and then assert + * If false, print failed test message (if running with --verbose) and then + * assert. */ #define ASSERT_LT(_expected, _seen) do { \ if ((_expected) >= (_seen)) \ @@ -48,6 +59,43 @@ assert((_expected) < (_seen)); \ } while (0) +/** + * ASSERT_LE(): + * Check the condition + * @_expected <= @_seen + * If false, print failed test message (if running with --verbose) and then + * assert. + */ +#define ASSERT_LE(_expected, _seen) do { \ + if ((_expected) > (_seen)) \ + test_fail(); \ + assert((_expected) <= (_seen)); \ +} while (0) + +/** + * ASSERT_MEM_EQ(): + * Check that the first @_size bytes of @_seen are all equal to @_expected. + * If false, print failed test message (if running with --verbose) and then + * assert. + */ +#define ASSERT_MEM_EQ(_seen, _expected, _size) do { \ + for (int _i = 0; _i < (_size); _i++) { \ + ASSERT_EQ(((char *)_seen)[_i], (_expected)); \ + } \ +} while (0) + +/** + * ASSERT_MEM_NE(): + * Check that none of the first @_size bytes of @_seen are equal to @_expected. + * If false, print failed test message (if running with --verbose) and then + * assert. + */ +#define ASSERT_MEM_NE(_seen, _expected, _size) do { \ + for (int _i = 0; _i < (_size); _i++) { \ + ASSERT_NE(((char *)_seen)[_i], (_expected)); \ + } \ +} while (0) + #define PREFIX_PUSH() prefix_push(__func__) /* @@ -65,9 +113,15 @@ struct region { phys_addr_t size; }; +static inline phys_addr_t __maybe_unused region_end(struct memblock_region *rgn) +{ + return rgn->base + rgn->size; +} + void reset_memblock_regions(void); void reset_memblock_attributes(void); void setup_memblock(void); +void setup_numa_memblock(const unsigned int node_fracs[]); void dummy_physical_memory_init(void); void dummy_physical_memory_cleanup(void); void parse_args(int argc, char **argv); @@ -85,4 +139,28 @@ static inline void test_pass_pop(void) prefix_pop(); } +static inline void run_top_down(int (*func)()) +{ + memblock_set_bottom_up(false); + prefix_push("top-down"); + func(); + prefix_pop(); +} + +static inline void run_bottom_up(int (*func)()) +{ + memblock_set_bottom_up(true); + prefix_push("bottom-up"); + func(); + prefix_pop(); +} + +static inline void assert_mem_content(void *mem, int size, int flags) +{ + if (flags & TEST_F_RAW) + ASSERT_MEM_NE(mem, 0, size); + else + ASSERT_MEM_EQ(mem, 0, size); +} + #endif diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore index d971516401e6..c901d96dd013 100644 --- a/tools/testing/radix-tree/.gitignore +++ b/tools/testing/radix-tree/.gitignore @@ -6,3 +6,5 @@ main multiorder radix-tree.c xarray +maple +ma_xa_benchmark diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index c4ea4fbb0bfc..89d613e0505b 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -4,9 +4,9 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \ -fsanitize=undefined LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu -TARGETS = main idr-test multiorder xarray +TARGETS = main idr-test multiorder xarray maple CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \ - slab.o + slab.o maple.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ iteration_check_2.o benchmark.o @@ -29,6 +29,8 @@ idr-test: idr-test.o $(CORE_OFILES) xarray: $(CORE_OFILES) +maple: $(CORE_OFILES) + multiorder: multiorder.o $(CORE_OFILES) clean: @@ -40,6 +42,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ ../../../include/linux/xarray.h \ + ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ ../../../include/linux/idr.h @@ -51,6 +54,8 @@ idr.c: ../../../lib/idr.c xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c +maple.o: ../../../lib/maple_tree.c ../../../lib/test_maple_tree.c + generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \ diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h index 2218b3cc184e..e7da80350236 100644 --- a/tools/testing/radix-tree/generated/autoconf.h +++ b/tools/testing/radix-tree/generated/autoconf.h @@ -1 +1,2 @@ #define CONFIG_XARRAY_MULTI 1 +#define CONFIG_64BIT 1 diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index d5c1bcba86fe..2048d12c31df 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -23,15 +23,47 @@ struct kmem_cache { int nr_objs; void *objs; void (*ctor)(void *); + unsigned int non_kernel; + unsigned long nr_allocated; + unsigned long nr_tallocated; }; +void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val) +{ + cachep->non_kernel = val; +} + +unsigned long kmem_cache_get_alloc(struct kmem_cache *cachep) +{ + return cachep->size * cachep->nr_allocated; +} + +unsigned long kmem_cache_nr_allocated(struct kmem_cache *cachep) +{ + return cachep->nr_allocated; +} + +unsigned long kmem_cache_nr_tallocated(struct kmem_cache *cachep) +{ + return cachep->nr_tallocated; +} + +void kmem_cache_zero_nr_tallocated(struct kmem_cache *cachep) +{ + cachep->nr_tallocated = 0; +} + void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, int gfp) { void *p; - if (!(gfp & __GFP_DIRECT_RECLAIM)) - return NULL; + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + return NULL; + + cachep->non_kernel--; + } pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs) { @@ -53,19 +85,21 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, memset(p, 0, cachep->size); } + uatomic_inc(&cachep->nr_allocated); uatomic_inc(&nr_allocated); + uatomic_inc(&cachep->nr_tallocated); if (kmalloc_verbose) printf("Allocating %p from slab\n", p); return p; } -void kmem_cache_free(struct kmem_cache *cachep, void *objp) +void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) { assert(objp); uatomic_dec(&nr_allocated); + uatomic_dec(&cachep->nr_allocated); if (kmalloc_verbose) printf("Freeing %p to slab\n", objp); - pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs > 10 || cachep->align) { memset(objp, POISON_FREE, cachep->size); free(objp); @@ -75,9 +109,80 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) node->parent = cachep->objs; cachep->objs = node; } +} + +void kmem_cache_free(struct kmem_cache *cachep, void *objp) +{ + pthread_mutex_lock(&cachep->lock); + kmem_cache_free_locked(cachep, objp); pthread_mutex_unlock(&cachep->lock); } +void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) +{ + if (kmalloc_verbose) + pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + + pthread_mutex_lock(&cachep->lock); + for (int i = 0; i < size; i++) + kmem_cache_free_locked(cachep, list[i]); + pthread_mutex_unlock(&cachep->lock); +} + +int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, + void **p) +{ + size_t i; + + if (kmalloc_verbose) + pr_debug("Bulk alloc %lu\n", size); + + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (cachep->non_kernel < size) + return 0; + + cachep->non_kernel -= size; + } + + pthread_mutex_lock(&cachep->lock); + if (cachep->nr_objs >= size) { + struct radix_tree_node *node; + + for (i = 0; i < size; i++) { + node = cachep->objs; + cachep->nr_objs--; + cachep->objs = node->parent; + p[i] = node; + node->parent = NULL; + } + pthread_mutex_unlock(&cachep->lock); + } else { + pthread_mutex_unlock(&cachep->lock); + for (i = 0; i < size; i++) { + if (cachep->align) { + posix_memalign(&p[i], cachep->align, + cachep->size * size); + } else { + p[i] = malloc(cachep->size * size); + } + if (cachep->ctor) + cachep->ctor(p[i]); + else if (gfp & __GFP_ZERO) + memset(p[i], 0, cachep->size); + } + } + + for (i = 0; i < size; i++) { + uatomic_inc(&nr_allocated); + uatomic_inc(&cachep->nr_allocated); + uatomic_inc(&cachep->nr_tallocated); + if (kmalloc_verbose) + printf("Allocating %p from slab\n", p[i]); + } + + return size; +} + struct kmem_cache * kmem_cache_create(const char *name, unsigned int size, unsigned int align, unsigned int flags, void (*ctor)(void *)) @@ -88,7 +193,54 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, ret->size = size; ret->align = align; ret->nr_objs = 0; + ret->nr_allocated = 0; + ret->nr_tallocated = 0; ret->objs = NULL; ret->ctor = ctor; + ret->non_kernel = 0; return ret; } + +/* + * Test the test infrastructure for kem_cache_alloc/free and bulk counterparts. + */ +void test_kmem_cache_bulk(void) +{ + int i; + void *list[12]; + static struct kmem_cache *test_cache, *test_cache2; + + /* + * Testing the bulk allocators without aligned kmem_cache to force the + * bulk alloc/free to reuse + */ + test_cache = kmem_cache_create("test_cache", 256, 0, SLAB_PANIC, NULL); + + for (i = 0; i < 5; i++) + list[i] = kmem_cache_alloc(test_cache, __GFP_DIRECT_RECLAIM); + + for (i = 0; i < 5; i++) + kmem_cache_free(test_cache, list[i]); + assert(test_cache->nr_objs == 5); + + kmem_cache_alloc_bulk(test_cache, __GFP_DIRECT_RECLAIM, 5, list); + kmem_cache_free_bulk(test_cache, 5, list); + + for (i = 0; i < 12 ; i++) + list[i] = kmem_cache_alloc(test_cache, __GFP_DIRECT_RECLAIM); + + for (i = 0; i < 12; i++) + kmem_cache_free(test_cache, list[i]); + + /* The last free will not be kept around */ + assert(test_cache->nr_objs == 11); + + /* Aligned caches will immediately free */ + test_cache2 = kmem_cache_create("test_cache2", 128, 128, SLAB_PANIC, NULL); + + kmem_cache_alloc_bulk(test_cache2, __GFP_DIRECT_RECLAIM, 10, list); + kmem_cache_free_bulk(test_cache2, 10, list); + assert(!test_cache2->nr_objs); + + +} diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 39867fd80c8f..c5c9d05f29da 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -14,6 +14,7 @@ #include "../../../include/linux/kconfig.h" #define printk printf +#define pr_err printk #define pr_info printk #define pr_debug printk #define pr_cont printk diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h index 016cff473cfc..62473ab57f99 100644 --- a/tools/testing/radix-tree/linux/lockdep.h +++ b/tools/testing/radix-tree/linux/lockdep.h @@ -11,4 +11,6 @@ static inline void lockdep_set_class(spinlock_t *lock, struct lock_class_key *key) { } + +extern int lockdep_is_held(const void *); #endif /* _LINUX_LOCKDEP_H */ diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h new file mode 100644 index 000000000000..7d8d1f445b89 --- /dev/null +++ b/tools/testing/radix-tree/linux/maple_tree.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#define atomic_t int32_t +#include "../../../../include/linux/maple_tree.h" +#define atomic_inc(x) uatomic_inc(x) +#define atomic_read(x) uatomic_read(x) +#define atomic_set(x, y) do {} while (0) +#define U8_MAX UCHAR_MAX diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c new file mode 100644 index 000000000000..35082671928a --- /dev/null +++ b/tools/testing/radix-tree/maple.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * maple_tree.c: Userspace shim for maple tree test-suite + * Copyright (c) 2018 Liam R. Howlett <Liam.Howlett@Oracle.com> + */ + +#define CONFIG_DEBUG_MAPLE_TREE +#define CONFIG_MAPLE_SEARCH +#include "test.h" + +#define module_init(x) +#define module_exit(x) +#define MODULE_AUTHOR(x) +#define MODULE_LICENSE(x) +#define dump_stack() assert(0) + +#include "../../../lib/maple_tree.c" +#undef CONFIG_DEBUG_MAPLE_TREE +#include "../../../lib/test_maple_tree.c" + +void farmer_tests(void) +{ + struct maple_node *node; + DEFINE_MTREE(tree); + + mt_dump(&tree); + + tree.ma_root = xa_mk_value(0); + mt_dump(&tree); + + node = mt_alloc_one(GFP_KERNEL); + node->parent = (void *)((unsigned long)(&tree) | 1); + node->slot[0] = xa_mk_value(0); + node->slot[1] = xa_mk_value(1); + node->mr64.pivot[0] = 0; + node->mr64.pivot[1] = 1; + node->mr64.pivot[2] = 0; + tree.ma_root = mt_mk_node(node, maple_leaf_64); + mt_dump(&tree); + + ma_free_rcu(node); +} + +void maple_tree_tests(void) +{ + farmer_tests(); + maple_tree_seed(); + maple_tree_harvest(); +} + +int __weak main(void) +{ + maple_tree_init(); + maple_tree_tests(); + rcu_barrier(); + if (nr_allocated) + printf("nr_allocated = %d\n", nr_allocated); + return 0; +} diff --git a/tools/testing/radix-tree/trace/events/maple_tree.h b/tools/testing/radix-tree/trace/events/maple_tree.h new file mode 100644 index 000000000000..97d0e1ddcf08 --- /dev/null +++ b/tools/testing/radix-tree/trace/events/maple_tree.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#define trace_ma_op(a, b) do {} while (0) +#define trace_ma_read(a, b) do {} while (0) +#define trace_ma_write(a, b, c, d) do {} while (0) diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config index 84fe884fad86..97d549ee894f 100644 --- a/tools/testing/selftests/cgroup/config +++ b/tools/testing/selftests/cgroup/config @@ -4,5 +4,4 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_MEMCG=y CONFIG_MEMCG_KMEM=y -CONFIG_MEMCG_SWAP=y CONFIG_PAGE_COUNTER=y diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 0470c5f3e690..a1fa2eff8192 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -6,6 +6,7 @@ TEST_GEN_FILES += huge_count_read_write TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh +TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += sysfs.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh new file mode 100644 index 000000000000..4a76e37ef16b --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test duplicated context creation +# ================================ + +if ! echo foo > "$DBGFS/mk_contexts" +then + echo "context creation failed" + exit 1 +fi + +if echo foo > "$DBGFS/mk_contexts" +then + echo "duplicate context creation success" + exit 1 +fi + +if ! echo foo > "$DBGFS/rm_contexts" +then + echo "context deletion failed" + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc new file mode 100644 index 000000000000..fc1daac7f066 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc @@ -0,0 +1,27 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Event probe event parser error log check +# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README error_log + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'event_probe' "$1" 'dynamic_events' +} + +check_error 'e ^a.' # NO_EVENT_INFO +check_error 'e ^.b' # NO_EVENT_INFO +check_error 'e ^a.b' # BAD_ATTACH_EVENT +check_error 'e syscalls/sys_enter_openat ^foo' # BAD_ATTACH_ARG +check_error 'e:^/bar syscalls/sys_enter_openat' # NO_GROUP_NAME +check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat' # GROUP_TOO_LONG + +check_error 'e:^foo.1/bar syscalls/sys_enter_openat' # BAD_GROUP_NAME +check_error 'e:^ syscalls/sys_enter_openat' # NO_EVENT_NAME +check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat' # EVENT_TOO_LONG +check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME + +check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG +check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG + +check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index 3145b0f1835c..8d26d5505808 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -85,7 +85,7 @@ run_enable_disable() { echo $check_disable > $EVENT_ENABLE done sleep $SLEEP_TIME - echo " make sure it's still works" + echo " make sure it still works" test_event_enabled $check_enable_star reset_ftrace_filter diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 45d9aee1c0d8..2f0d705db9db 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/aarch32_id_regs /aarch64/arch_timer /aarch64/debug-exceptions /aarch64/get-reg-list diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index fde3ae8cfa4c..0172eb6cb6ee 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -147,6 +147,7 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test +TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c new file mode 100644 index 000000000000..6f9c1f19c7f6 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * aarch32_id_regs - Test for ID register behavior on AArch64-only systems + * + * Copyright (c) 2022 Google LLC. + * + * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ + * and WI from userspace. + */ + +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +#define BAD_ID_REG_VAL 0x1badc0deul + +#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0) + +static void guest_main(void) +{ + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1); + GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1); + GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3)); + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1); + GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7)); + + GUEST_DONE(); +} + +static void test_guest_raz(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static uint64_t raz_wi_reg_ids[] = { + KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1), + KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1), + KVM_ARM64_SYS_REG(SYS_MVFR0_EL1), + KVM_ARM64_SYS_REG(SYS_MVFR1_EL1), + KVM_ARM64_SYS_REG(SYS_MVFR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1), +}; + +static void test_user_raz_wi(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) { + uint64_t reg_id = raz_wi_reg_ids[i]; + uint64_t val; + + vcpu_get_reg(vcpu, reg_id, &val); + ASSERT_EQ(val, 0); + + /* + * Expect the ioctl to succeed with no effect on the register + * value. + */ + vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); + + vcpu_get_reg(vcpu, reg_id, &val); + ASSERT_EQ(val, 0); + } +} + +static uint64_t raz_invariant_reg_ids[] = { + KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1), + KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)), + KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1), + KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)), +}; + +static void test_user_raz_invariant(struct kvm_vcpu *vcpu) +{ + int i, r; + + for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) { + uint64_t reg_id = raz_invariant_reg_ids[i]; + uint64_t val; + + vcpu_get_reg(vcpu, reg_id, &val); + ASSERT_EQ(val, 0); + + r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); + TEST_ASSERT(r < 0 && errno == EINVAL, + "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); + + vcpu_get_reg(vcpu, reg_id, &val); + ASSERT_EQ(val, 0); + } +} + + + +static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) +{ + uint64_t val, el0; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + + el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT; + return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + + TEST_REQUIRE(vcpu_aarch64_only(vcpu)); + + ucall_init(vm, NULL); + + test_user_raz_wi(vcpu); + test_user_raz_invariant(vcpu); + test_guest_raz(vcpu); + + ucall_uninit(vm); + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 2ee35cf9801e..947bd201435c 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -22,6 +22,7 @@ #define SPSR_SS (1 << 21) extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; +extern unsigned char iter_ss_begin, iter_ss_end; static volatile uint64_t sw_bp_addr, hw_bp_addr; static volatile uint64_t wp_addr, wp_data_addr; static volatile uint64_t svc_addr; @@ -238,6 +239,46 @@ static void guest_svc_handler(struct ex_regs *regs) svc_addr = regs->pc; } +enum single_step_op { + SINGLE_STEP_ENABLE = 0, + SINGLE_STEP_DISABLE = 1, +}; + +static void guest_code_ss(int test_cnt) +{ + uint64_t i; + uint64_t bvr, wvr, w_bvr, w_wvr; + + for (i = 0; i < test_cnt; i++) { + /* Bits [1:0] of dbg{b,w}vr are RES0 */ + w_bvr = i << 2; + w_wvr = i << 2; + + /* Enable Single Step execution */ + GUEST_SYNC(SINGLE_STEP_ENABLE); + + /* + * The userspace will veriry that the pc is as expected during + * single step execution between iter_ss_begin and iter_ss_end. + */ + asm volatile("iter_ss_begin:nop\n"); + + write_sysreg(w_bvr, dbgbvr0_el1); + write_sysreg(w_wvr, dbgwvr0_el1); + bvr = read_sysreg(dbgbvr0_el1); + wvr = read_sysreg(dbgwvr0_el1); + + asm volatile("iter_ss_end:\n"); + + /* Disable Single Step execution */ + GUEST_SYNC(SINGLE_STEP_DISABLE); + + GUEST_ASSERT(bvr == w_bvr); + GUEST_ASSERT(wvr == w_wvr); + } + GUEST_DONE(); +} + static int debug_version(struct kvm_vcpu *vcpu) { uint64_t id_aa64dfr0; @@ -246,7 +287,7 @@ static int debug_version(struct kvm_vcpu *vcpu) return id_aa64dfr0 & 0xf; } -int main(int argc, char *argv[]) +static void test_guest_debug_exceptions(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -259,9 +300,6 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); - __TEST_REQUIRE(debug_version(vcpu) >= 6, - "Armv8 debug architecture not supported."); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_BRK_INS, guest_sw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, @@ -294,5 +332,108 @@ int main(int argc, char *argv[]) done: kvm_vm_free(vm); +} + +void test_single_step_from_userspace(int test_cnt) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + struct kvm_run *run; + uint64_t pc, cmd; + uint64_t test_pc = 0; + bool ss_enable = false; + struct kvm_guest_debug debug = {}; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); + ucall_init(vm, NULL); + run = vcpu->run; + vcpu_args_set(vcpu, 1, test_cnt); + + while (1) { + vcpu_run(vcpu); + if (run->exit_reason != KVM_EXIT_DEBUG) { + cmd = get_ucall(vcpu, &uc); + if (cmd == UCALL_ABORT) { + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + } else if (cmd == UCALL_DONE) { + break; + } + + TEST_ASSERT(cmd == UCALL_SYNC, + "Unexpected ucall cmd 0x%lx", cmd); + + if (uc.args[1] == SINGLE_STEP_ENABLE) { + debug.control = KVM_GUESTDBG_ENABLE | + KVM_GUESTDBG_SINGLESTEP; + ss_enable = true; + } else { + debug.control = SINGLE_STEP_DISABLE; + ss_enable = false; + } + + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + + TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); + + /* Check if the current pc is expected. */ + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + TEST_ASSERT(!test_pc || pc == test_pc, + "Unexpected pc 0x%lx (expected 0x%lx)", + pc, test_pc); + + /* + * If the current pc is between iter_ss_bgin and + * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should + * be the current pc + 4. + */ + if ((pc >= (uint64_t)&iter_ss_begin) && + (pc < (uint64_t)&iter_ss_end)) + test_pc = pc + 4; + else + test_pc = 0; + } + + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("Usage: %s [-h] [-i iterations of the single step test]\n", name); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int opt; + int ss_iteration = 10000; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(debug_version(vcpu) >= 6, + "Armv8 debug architecture not supported."); + kvm_vm_free(vm); + + while ((opt = getopt(argc, argv, "i:")) != -1) { + switch (opt) { + case 'i': + ss_iteration = atoi(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + test_guest_debug_exceptions(); + test_single_step_from_userspace(ss_iteration); + return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index f7621f6e938e..e0b9e81a3e09 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -1,12 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the - * CPU_ON PSCI call matches what the caller requested. + * psci_test - Tests relating to KVM's PSCI implementation. * * Copyright (c) 2021 Google LLC. * - * This is a regression test for a race between KVM servicing the PSCI call and - * userspace reading the vCPUs registers. + * This test includes: + * - A regression test for a race between KVM servicing the PSCI CPU_ON call + * and userspace reading the targeted vCPU's registers. + * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated + * KVM_SYSTEM_EVENT_SUSPEND UAPI. */ #define _GNU_SOURCE diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 9c883c94d478..b5234d6efbe1 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -17,6 +17,7 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/atomic.h> +#include <asm/barrier.h> #include "kvm_util.h" #include "test_util.h" @@ -264,7 +265,8 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) static bool dirty_ring_supported(void) { - return kvm_has_cap(KVM_CAP_DIRTY_LOG_RING); + return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) || + kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL)); } static void dirty_ring_create_vm_done(struct kvm_vm *vm) @@ -279,12 +281,12 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm) static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) { - return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; + return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY; } static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) { - gfn->flags = KVM_DIRTY_GFN_F_RESET; + smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET); } static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 24fde97f6121..e42a09cd24a0 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -175,6 +175,10 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[]; int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); + +bool get_kvm_intel_param_bool(const char *param); +bool get_kvm_amd_param_bool(const char *param); + unsigned int kvm_check_cap(long cap); static inline bool kvm_has_cap(long cap) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5c5a88180b6c..befc754ce9b3 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -63,8 +63,10 @@ void test_assert(bool exp, const char *exp_str, #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ } while (0) -#define TEST_FAIL(fmt, ...) \ - TEST_ASSERT(false, fmt, ##__VA_ARGS__) +#define TEST_FAIL(fmt, ...) do { \ + TEST_ASSERT(false, fmt, ##__VA_ARGS__); \ + __builtin_unreachable(); \ +} while (0) size_t parse_size(const char *size); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0cbc71b7af50..e8ca0d8a6a7e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -825,6 +825,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); } +bool kvm_is_tdp_enabled(void); + uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t vaddr); void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, @@ -855,6 +857,8 @@ enum pg_level { #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); +void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint64_t nr_bytes, int level); /* * Basic CPU control in CR0 diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9889fe0d8919..f1cb1627161f 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -50,6 +50,45 @@ int open_kvm_dev_path_or_exit(void) return _open_kvm_dev_path_or_exit(O_RDONLY); } +static bool get_module_param_bool(const char *module_name, const char *param) +{ + const int path_size = 128; + char path[path_size]; + char value; + ssize_t r; + int fd; + + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", + module_name, param); + TEST_ASSERT(r < path_size, + "Failed to construct sysfs path in %d bytes.", path_size); + + fd = open_path_or_exit(path, O_RDONLY); + + r = read(fd, &value, 1); + TEST_ASSERT(r == 1, "read(%s) failed", path); + + r = close(fd); + TEST_ASSERT(!r, "close(%s) failed", path); + + if (value == 'Y') + return true; + else if (value == 'N') + return false; + + TEST_FAIL("Unrecognized value '%c' for boolean module param", value); +} + +bool get_kvm_intel_param_bool(const char *param) +{ + return get_module_param_bool("kvm_intel", param); +} + +bool get_kvm_amd_param_bool(const char *param) +{ + return get_module_param_bool("kvm_amd", param); +} + /* * Capability * @@ -82,7 +121,10 @@ unsigned int kvm_check_cap(long cap) void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) { - vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); + if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); + else + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); vm->dirty_ring_size = ring_size; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 2e6e61bbe81b..39c4409ef56a 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -111,6 +111,14 @@ static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) } } +bool kvm_is_tdp_enabled(void) +{ + if (is_intel_cpu()) + return get_kvm_intel_param_bool("ept"); + else + return get_kvm_amd_param_bool("npt"); +} + void virt_arch_pgd_alloc(struct kvm_vm *vm) { TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " @@ -214,6 +222,25 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); } +void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint64_t nr_bytes, int level) +{ + uint64_t pg_size = PG_LEVEL_SIZE(level); + uint64_t nr_pages = nr_bytes / pg_size; + int i; + + TEST_ASSERT(nr_bytes % pg_size == 0, + "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx", + nr_bytes, pg_size); + + for (i = 0; i < nr_pages; i++) { + __virt_pg_map(vm, vaddr, paddr, level); + + vaddr += pg_size; + paddr += pg_size; + } +} + static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t vaddr) @@ -1294,20 +1321,9 @@ done: /* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ bool vm_is_unrestricted_guest(struct kvm_vm *vm) { - char val = 'N'; - size_t count; - FILE *f; - /* Ensure that a KVM vendor-specific module is loaded. */ if (vm == NULL) close(open_kvm_dev_path_or_exit()); - f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); - if (f) { - count = fread(&val, sizeof(char), 1, f); - TEST_ASSERT(count == 1, "Unable to read from param file."); - fclose(f); - } - - return val == 'Y'; + return get_kvm_intel_param_bool("unrestricted_guest"); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 6d445886e16c..5495a92dfd5a 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -60,18 +60,6 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, seg->base = base; } -/* - * Avoid using memset to clear the vmcb, since libc may not be - * available in L1 (and, even if it is, features that libc memset may - * want to use, like AVX, may not be enabled). - */ -static void clear_vmcb(struct vmcb *vmcb) -{ - int n = sizeof(*vmcb) / sizeof(u32); - - asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory"); -} - void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; @@ -88,7 +76,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r wrmsr(MSR_EFER, efer | EFER_SVME); wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); - clear_vmcb(vmcb); + memset(vmcb, 0, sizeof(*vmcb)); asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c index e0004bd26536..32f7e09ef67c 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -17,84 +17,70 @@ /* VMCALL and VMMCALL are both 3-byte opcodes. */ #define HYPERCALL_INSN_SIZE 3 -static bool ud_expected; +static bool quirk_disabled; static void guest_ud_handler(struct ex_regs *regs) { - GUEST_ASSERT(ud_expected); - GUEST_DONE(); + regs->rax = -EFAULT; + regs->rip += HYPERCALL_INSN_SIZE; } -extern uint8_t svm_hypercall_insn[HYPERCALL_INSN_SIZE]; -static uint64_t svm_do_sched_yield(uint8_t apic_id) -{ - uint64_t ret; +static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; +static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; - asm volatile("mov %1, %%rax\n\t" - "mov %2, %%rbx\n\t" - "svm_hypercall_insn:\n\t" - "vmmcall\n\t" - "mov %%rax, %0\n\t" - : "=r"(ret) - : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id) - : "rax", "rbx", "memory"); - - return ret; -} - -extern uint8_t vmx_hypercall_insn[HYPERCALL_INSN_SIZE]; -static uint64_t vmx_do_sched_yield(uint8_t apic_id) +extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; +static uint64_t do_sched_yield(uint8_t apic_id) { uint64_t ret; - asm volatile("mov %1, %%rax\n\t" - "mov %2, %%rbx\n\t" - "vmx_hypercall_insn:\n\t" - "vmcall\n\t" - "mov %%rax, %0\n\t" - : "=r"(ret) - : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id) - : "rax", "rbx", "memory"); + asm volatile("hypercall_insn:\n\t" + ".byte 0xcc,0xcc,0xcc\n\t" + : "=a"(ret) + : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id) + : "memory"); return ret; } static void guest_main(void) { - uint8_t *native_hypercall_insn, *hypercall_insn; - uint8_t apic_id; - - apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); + const uint8_t *native_hypercall_insn; + const uint8_t *other_hypercall_insn; + uint64_t ret; if (is_intel_cpu()) { - native_hypercall_insn = vmx_hypercall_insn; - hypercall_insn = svm_hypercall_insn; - svm_do_sched_yield(apic_id); + native_hypercall_insn = vmx_vmcall; + other_hypercall_insn = svm_vmmcall; } else if (is_amd_cpu()) { - native_hypercall_insn = svm_hypercall_insn; - hypercall_insn = vmx_hypercall_insn; - vmx_do_sched_yield(apic_id); + native_hypercall_insn = svm_vmmcall; + other_hypercall_insn = vmx_vmcall; } else { GUEST_ASSERT(0); /* unreachable */ return; } + memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE); + + ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID))); + /* - * The hypercall didn't #UD (guest_ud_handler() signals "done" if a #UD - * occurs). Verify that a #UD is NOT expected and that KVM patched in - * the native hypercall. + * If the quirk is disabled, verify that guest_ud_handler() "returned" + * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is + * enabled, verify that the hypercall succeeded and that KVM patched in + * the "right" hypercall. */ - GUEST_ASSERT(!ud_expected); - GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE)); - GUEST_DONE(); -} + if (quirk_disabled) { + GUEST_ASSERT(ret == (uint64_t)-EFAULT); + GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn, + HYPERCALL_INSN_SIZE)); + } else { + GUEST_ASSERT(!ret); + GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, + HYPERCALL_INSN_SIZE)); + } -static void setup_ud_vector(struct kvm_vcpu *vcpu) -{ - vm_init_descriptor_tables(vcpu->vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); + GUEST_DONE(); } static void enter_guest(struct kvm_vcpu *vcpu) @@ -117,35 +103,23 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } -static void test_fix_hypercall(void) +static void test_fix_hypercall(bool disable_quirk) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; vm = vm_create_with_one_vcpu(&vcpu, guest_main); - setup_ud_vector(vcpu); - - ud_expected = false; - sync_global_to_guest(vm, ud_expected); - - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - - enter_guest(vcpu); -} -static void test_fix_hypercall_disabled(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - setup_ud_vector(vcpu); + vm_init_descriptor_tables(vcpu->vm); + vcpu_init_descriptor_tables(vcpu); + vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, - KVM_X86_QUIRK_FIX_HYPERCALL_INSN); + if (disable_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, + KVM_X86_QUIRK_FIX_HYPERCALL_INSN); - ud_expected = true; - sync_global_to_guest(vm, ud_expected); + quirk_disabled = disable_quirk; + sync_global_to_guest(vm, quirk_disabled); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); @@ -156,6 +130,6 @@ int main(void) { TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN); - test_fix_hypercall(); - test_fix_hypercall_disabled(); + test_fix_hypercall(false); + test_fix_hypercall(true); } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 79ab0152d281..05b32e550a80 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -26,7 +26,8 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, : "=a" (*hv_status), "+c" (control), "+d" (input_address), KVM_ASM_SAFE_OUTPUTS(vector) - : [output_address] "r"(output_address) + : [output_address] "r"(output_address), + "a" (-EFAULT) : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); return vector; } @@ -81,13 +82,13 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) } vector = hypercall(hcall->control, input, output, &res); - if (hcall->ud_expected) + if (hcall->ud_expected) { GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); - else + } else { GUEST_ASSERT_2(!vector, hcall->control, vector); + GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res); + } - GUEST_ASSERT_2(!hcall->ud_expected || res == hcall->expect, - hcall->expect, res); GUEST_DONE(); } @@ -507,7 +508,7 @@ static void guest_test_hcalls_access(void) switch (stage) { case 0: feat->eax |= HV_MSR_HYPERCALL_AVAILABLE; - hcall->control = 0xdeadbeef; + hcall->control = 0xbeef; hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; break; diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index e19933ea34ca..59ffe7fd354f 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -112,6 +112,7 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, { struct kvm_vcpu *vcpu; struct kvm_vm *vm; + uint64_t nr_bytes; void *hva; int r; @@ -134,10 +135,24 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, HPAGE_GPA, HPAGE_SLOT, HPAGE_SLOT_NPAGES, 0); - virt_map(vm, HPAGE_GVA, HPAGE_GPA, HPAGE_SLOT_NPAGES); + nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size; + + /* + * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the + * region into the guest with 2MiB pages whenever TDP is disabled (i.e. + * whenever KVM is shadowing the guest page tables). + * + * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge + * pages irrespective of the guest page size, so map with 4KiB pages + * to test that that is the case. + */ + if (kvm_is_tdp_enabled()) + virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K); + else + virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M); hva = addr_gpa2hva(vm, HPAGE_GPA); - memset(hva, RETURN_OPCODE, HPAGE_SLOT_NPAGES * PAGE_SIZE); + memset(hva, RETURN_OPCODE, nr_bytes); check_2m_page_count(vm, 0); check_split_count(vm, 0); diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index 46a97f318f58..74ee5067a8ce 100755 --- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh @@ -134,6 +134,16 @@ offline_memory_expect_fail() return 0 } +online_all_offline_memory() +{ + for memory in `hotpluggable_offline_memory`; do + if ! online_memory_expect_success $memory; then + echo "$FUNCNAME $memory: unexpected fail" >&2 + retval=1 + fi + done +} + error=-12 priority=0 # Run with default of ratio=2 for Kselftest run @@ -197,8 +207,11 @@ echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):" for memory in `hotpluggable_online_memory`; do if [ "$target" -gt 0 ]; then echo "online->offline memory$memory" - if offline_memory_expect_success $memory; then + if offline_memory_expect_success $memory &>/dev/null; then target=$(($target - 1)) + echo "-> Success" + else + echo "-> Failure" fi fi done @@ -257,7 +270,7 @@ prerequisite_extra echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error for memory in `hotpluggable_online_memory`; do if [ $((RANDOM % 100)) -lt $ratio ]; then - offline_memory_expect_success $memory + offline_memory_expect_success $memory &>/dev/null fi done @@ -266,16 +279,16 @@ done # echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error for memory in `hotpluggable_offline_memory`; do - online_memory_expect_fail $memory + if ! online_memory_expect_fail $memory; then + retval=1 + fi done # # Online all hot-pluggable memory # echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error -for memory in `hotpluggable_offline_memory`; do - online_memory_expect_success $memory -done +online_all_offline_memory # # Test memory hot-remove error handling (online => offline) @@ -283,11 +296,18 @@ done echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error for memory in `hotpluggable_online_memory`; do if [ $((RANDOM % 100)) -lt $ratio ]; then - offline_memory_expect_fail $memory + if ! offline_memory_expect_fail $memory; then + retval=1 + fi fi done echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error /sbin/modprobe -q -r memory-notifier-error-inject +# +# Restore memory before exit +# +online_all_offline_memory + exit $retval diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d5a0dd548989..ee5e98204d3d 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1223,6 +1223,11 @@ ipv4_fcnal() log_test $rc 0 "Delete nexthop route warning" run_cmd "$IP route delete 172.16.101.1/32 nhid 12" run_cmd "$IP nexthop del id 12" + + run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.101.0/24 nhid 21" + run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" + log_test $? 2 "Delete multipath route with only nh id based entry" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 600e3a19d5e2..4504ee07be08 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,7 +6,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh CFLAGS += $(shell pkg-config --cflags libmnl 2>/dev/null || echo "-I/usr/include/libmnl") LDLIBS = -lmnl diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh index fd76b69635a4..dff476e45e77 100755 --- a/tools/testing/selftests/netfilter/nft_fib.sh +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -188,6 +188,7 @@ test_ping() { ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null sleep 3 diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh new file mode 100755 index 000000000000..2d8da7bd8ab7 --- /dev/null +++ b/tools/testing/selftests/netfilter/rpath.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 + +# search for legacy iptables (it uses the xtables extensions +if iptables-legacy --version >/dev/null 2>&1; then + iptables='iptables-legacy' +elif iptables --version >/dev/null 2>&1; then + iptables='iptables' +else + iptables='' +fi + +if ip6tables-legacy --version >/dev/null 2>&1; then + ip6tables='ip6tables-legacy' +elif ! ip6tables --version >/dev/null 2>&1; then + ip6tables='ip6tables' +else + ip6tables='' +fi + +if nft --version >/dev/null 2>&1; then + nft='nft' +else + nft='' +fi + +if [ -z "$iptables$ip6tables$nft" ]; then + echo "SKIP: Test needs iptables, ip6tables or nft" + exit $ksft_skip +fi + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +trap "ip netns del $ns1; ip netns del $ns2" EXIT + +# create two netns, disable rp_filter in ns2 and +# keep IPv6 address when moving into VRF +ip netns add "$ns1" +ip netns add "$ns2" +ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 + +# a standard connection between the netns, should not trigger rp filter +ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2" +ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up +ip -net "$ns1" a a 192.168.23.2/24 dev v0 +ip -net "$ns2" a a 192.168.23.1/24 dev v0 +ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad + +# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0 +ip -net "$ns2" link add d0 type dummy +ip -net "$ns2" link set d0 up +ip -net "$ns1" a a 192.168.42.2/24 dev v0 +ip -net "$ns2" a a 192.168.42.1/24 dev d0 +ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad + +# firewall matches to test +ip netns exec "$ns2" "$iptables" -t raw -A PREROUTING -s 192.168.0.0/16 -m rpfilter +ip netns exec "$ns2" "$ip6tables" -t raw -A PREROUTING -s fec0::/16 -m rpfilter +ip netns exec "$ns2" nft -f - <<EOF +table inet t { + chain c { + type filter hook prerouting priority raw; + ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter + ip6 saddr fec0::/16 fib saddr . iif oif exists counter + } +} +EOF + +die() { + echo "FAIL: $*" + #ip netns exec "$ns2" "$iptables" -t raw -vS + #ip netns exec "$ns2" "$ip6tables" -t raw -vS + #ip netns exec "$ns2" nft list ruleset + exit 1 +} + +# check rule counters, return true if rule did not match +ipt_zero_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0" +} +nft_zero_rule() { # (family) + [ -n "$nft" ] || return 0 + ip netns exec "$ns2" "$nft" list chain inet t c | \ + grep -q "$1 saddr .* counter packets 0 bytes 0" +} + +netns_ping() { # (netns, args...) + local netns="$1" + shift + ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null +} + +testrun() { + # clear counters first + [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z + [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z + if [ -n "$nft" ]; then + ( + echo "delete table inet t"; + ip netns exec "$ns2" nft -s list table inet t; + ) | ip netns exec "$ns2" nft -f - + fi + + # test 1: martian traffic should fail rpfilter matches + netns_ping "$ns1" -I v0 192.168.42.1 && \ + die "martian ping 192.168.42.1 succeeded" + netns_ping "$ns1" -I v0 fec0:42::1 && \ + die "martian ping fec0:42::1 succeeded" + + ipt_zero_rule "$iptables" || die "iptables matched martian" + ipt_zero_rule "$ip6tables" || die "ip6tables matched martian" + nft_zero_rule ip || die "nft IPv4 matched martian" + nft_zero_rule ip6 || die "nft IPv6 matched martian" + + # test 2: rpfilter match should pass for regular traffic + netns_ping "$ns1" 192.168.23.1 || \ + die "regular ping 192.168.23.1 failed" + netns_ping "$ns1" fec0:23::1 || \ + die "regular ping fec0:23::1 failed" + + ipt_zero_rule "$iptables" && die "iptables match not effective" + ipt_zero_rule "$ip6tables" && die "ip6tables match not effective" + nft_zero_rule ip && die "nft IPv4 match not effective" + nft_zero_rule ip6 && die "nft IPv6 match not effective" + +} + +testrun + +# repeat test with vrf device in $ns2 +ip -net "$ns2" link add vrf0 type vrf table 10 +ip -net "$ns2" link set vrf0 up +ip -net "$ns2" link set v0 master vrf0 + +testrun + +echo "PASS: netfilter reverse path match works as intended" +exit 0 diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index c4e6a34f9657..a156ac5dd2c6 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -5,6 +5,7 @@ /proc-fsconfig-hidepid /proc-loadavg-001 /proc-multiple-procfs +/proc-empty-vm /proc-pid-vm /proc-self-map-files-001 /proc-self-map-files-002 diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 219fc6113847..cd95369254c0 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -8,6 +8,7 @@ TEST_GEN_PROGS += fd-001-lookup TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c new file mode 100644 index 000000000000..d95b1cb43d9d --- /dev/null +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Create a process without mappings by unmapping everything at once and + * holding it with ptrace(2). See what happens to + * + * /proc/${pid}/maps + * /proc/${pid}/numa_maps + * /proc/${pid}/smaps + * /proc/${pid}/smaps_rollup + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +/* + * 0: vsyscall VMA doesn't exist vsyscall=none + * 1: vsyscall VMA is --xp vsyscall=xonly + * 2: vsyscall VMA is r-xp vsyscall=emulate + */ +static int g_vsyscall; +static const char *g_proc_pid_maps_vsyscall; +static const char *g_proc_pid_smaps_vsyscall; + +static const char proc_pid_maps_vsyscall_0[] = ""; +static const char proc_pid_maps_vsyscall_1[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; +static const char proc_pid_maps_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; + +static const char proc_pid_smaps_vsyscall_0[] = ""; + +static const char proc_pid_smaps_vsyscall_1[] = +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" +"Size: 4 kB\n" +"KernelPageSize: 4 kB\n" +"MMUPageSize: 4 kB\n" +"Rss: 0 kB\n" +"Pss: 0 kB\n" +"Pss_Dirty: 0 kB\n" +"Shared_Clean: 0 kB\n" +"Shared_Dirty: 0 kB\n" +"Private_Clean: 0 kB\n" +"Private_Dirty: 0 kB\n" +"Referenced: 0 kB\n" +"Anonymous: 0 kB\n" +"LazyFree: 0 kB\n" +"AnonHugePages: 0 kB\n" +"ShmemPmdMapped: 0 kB\n" +"FilePmdMapped: 0 kB\n" +"Shared_Hugetlb: 0 kB\n" +"Private_Hugetlb: 0 kB\n" +"Swap: 0 kB\n" +"SwapPss: 0 kB\n" +"Locked: 0 kB\n" +"THPeligible: 0\n" +/* + * "ProtectionKey:" field is conditional. It is possible to check it as well, + * but I don't have such machine. + */ +; + +static const char proc_pid_smaps_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" +"Size: 4 kB\n" +"KernelPageSize: 4 kB\n" +"MMUPageSize: 4 kB\n" +"Rss: 0 kB\n" +"Pss: 0 kB\n" +"Pss_Dirty: 0 kB\n" +"Shared_Clean: 0 kB\n" +"Shared_Dirty: 0 kB\n" +"Private_Clean: 0 kB\n" +"Private_Dirty: 0 kB\n" +"Referenced: 0 kB\n" +"Anonymous: 0 kB\n" +"LazyFree: 0 kB\n" +"AnonHugePages: 0 kB\n" +"ShmemPmdMapped: 0 kB\n" +"FilePmdMapped: 0 kB\n" +"Shared_Hugetlb: 0 kB\n" +"Private_Hugetlb: 0 kB\n" +"Swap: 0 kB\n" +"SwapPss: 0 kB\n" +"Locked: 0 kB\n" +"THPeligible: 0\n" +/* + * "ProtectionKey:" field is conditional. It is possible to check it as well, + * but I'm too tired. + */ +; + +static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) +{ + _exit(EXIT_FAILURE); +} + +static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) +{ + _exit(g_vsyscall); +} + +/* + * vsyscall page can't be unmapped, probe it directly. + */ +static void vsyscall(void) +{ + pid_t pid; + int wstatus; + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "fork, errno %d\n", errno); + exit(1); + } + if (pid == 0) { + setrlimit(RLIMIT_CORE, &(struct rlimit){}); + + /* Hide "segfault at ffffffffff600000" messages. */ + struct sigaction act = {}; + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV_vsyscall; + sigaction(SIGSEGV, &act, NULL); + + g_vsyscall = 0; + /* gettimeofday(NULL, NULL); */ + asm volatile ( + "call %P0" + : + : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) + : "rax", "rcx", "r11" + ); + + g_vsyscall = 1; + *(volatile int *)0xffffffffff600000UL; + + g_vsyscall = 2; + exit(g_vsyscall); + } + waitpid(pid, &wstatus, 0); + if (WIFEXITED(wstatus)) { + g_vsyscall = WEXITSTATUS(wstatus); + } else { + fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus); + exit(1); + } +} + +static int test_proc_pid_maps(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + perror("open /proc/${pid}/maps"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + if (g_vsyscall == 0) { + assert(rv == 0); + } else { + size_t len = strlen(g_proc_pid_maps_vsyscall); + assert(rv == len); + assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); + } + return EXIT_SUCCESS; + } +} + +static int test_proc_pid_numa_maps(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) { + /* + * /proc/${pid}/numa_maps is under CONFIG_NUMA, + * it doesn't necessarily exist. + */ + return EXIT_SUCCESS; + } + perror("open /proc/${pid}/numa_maps"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + assert(rv == 0); + return EXIT_SUCCESS; + } +} + +static int test_proc_pid_smaps(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) { + /* + * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, + * it doesn't necessarily exist. + */ + return EXIT_SUCCESS; + } + perror("open /proc/${pid}/smaps"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + if (g_vsyscall == 0) { + assert(rv == 0); + } else { + size_t len = strlen(g_proc_pid_maps_vsyscall); + /* TODO "ProtectionKey:" */ + assert(rv > len); + assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); + } + return EXIT_SUCCESS; + } +} + +static const char g_smaps_rollup[] = +"00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" +"Rss: 0 kB\n" +"Pss: 0 kB\n" +"Pss_Dirty: 0 kB\n" +"Pss_Anon: 0 kB\n" +"Pss_File: 0 kB\n" +"Pss_Shmem: 0 kB\n" +"Shared_Clean: 0 kB\n" +"Shared_Dirty: 0 kB\n" +"Private_Clean: 0 kB\n" +"Private_Dirty: 0 kB\n" +"Referenced: 0 kB\n" +"Anonymous: 0 kB\n" +"LazyFree: 0 kB\n" +"AnonHugePages: 0 kB\n" +"ShmemPmdMapped: 0 kB\n" +"FilePmdMapped: 0 kB\n" +"Shared_Hugetlb: 0 kB\n" +"Private_Hugetlb: 0 kB\n" +"Swap: 0 kB\n" +"SwapPss: 0 kB\n" +"Locked: 0 kB\n" +; + +static int test_proc_pid_smaps_rollup(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) { + /* + * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, + * it doesn't necessarily exist. + */ + return EXIT_SUCCESS; + } + perror("open /proc/${pid}/smaps_rollup"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + assert(rv == sizeof(g_smaps_rollup) - 1); + assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); + return EXIT_SUCCESS; + } +} + +int main(void) +{ + int rv = EXIT_SUCCESS; + + vsyscall(); + + switch (g_vsyscall) { + case 0: + g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; + g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; + break; + case 1: + g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; + g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; + break; + case 2: + g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; + g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; + break; + default: + abort(); + } + + pid_t pid = fork(); + if (pid == -1) { + perror("fork"); + return EXIT_FAILURE; + } else if (pid == 0) { + rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + if (rv != 0) { + if (errno == EPERM) { + fprintf(stderr, +"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" + ); + kill(getppid(), SIGTERM); + return EXIT_FAILURE; + } + perror("ptrace PTRACE_TRACEME"); + return EXIT_FAILURE; + } + + /* + * Hide "segfault at ..." messages. Signal handler won't run. + */ + struct sigaction act = {}; + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV; + sigaction(SIGSEGV, &act, NULL); + +#ifdef __amd64__ + munmap(NULL, ((size_t)1 << 47) - 4096); +#else +#error "implement 'unmap everything'" +#endif + return EXIT_FAILURE; + } else { + /* + * TODO find reliable way to signal parent that munmap(2) completed. + * Child can't do it directly because it effectively doesn't exist + * anymore. Looking at child's VM files isn't 100% reliable either: + * due to a bug they may not become empty or empty-like. + */ + sleep(1); + + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_maps(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_numa_maps(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_smaps(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_smaps_rollup(pid); + } + /* + * TODO test /proc/${pid}/statm, task_statm() + * ->start_code, ->end_code aren't updated by munmap(). + * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything. + */ + + /* Cut the rope. */ + int wstatus; + waitpid(pid, &wstatus, 0); + assert(WIFSTOPPED(wstatus)); + assert(WSTOPSIG(wstatus) == SIGSEGV); + } + + return rv; +} diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index e5962f4794f5..69551bfa215c 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -213,22 +213,22 @@ static int make_exe(const uint8_t *payload, size_t len) /* * 0: vsyscall VMA doesn't exist vsyscall=none - * 1: vsyscall VMA is r-xp vsyscall=emulate - * 2: vsyscall VMA is --xp vsyscall=xonly + * 1: vsyscall VMA is --xp vsyscall=xonly + * 2: vsyscall VMA is r-xp vsyscall=emulate */ -static int g_vsyscall; +static volatile int g_vsyscall; static const char *str_vsyscall; static const char str_vsyscall_0[] = ""; static const char str_vsyscall_1[] = -"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; -static const char str_vsyscall_2[] = "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; +static const char str_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) { - _exit(1); + _exit(g_vsyscall); } /* @@ -255,6 +255,7 @@ static void vsyscall(void) act.sa_sigaction = sigaction_SIGSEGV; (void)sigaction(SIGSEGV, &act, NULL); + g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ asm volatile ( "call %P0" @@ -262,45 +263,20 @@ static void vsyscall(void) : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) : "rax", "rcx", "r11" ); - exit(0); - } - waitpid(pid, &wstatus, 0); - if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { - /* vsyscall page exists and is executable. */ - } else { - /* vsyscall page doesn't exist. */ - g_vsyscall = 0; - return; - } - - pid = fork(); - if (pid < 0) { - fprintf(stderr, "fork, errno %d\n", errno); - exit(1); - } - if (pid == 0) { - struct rlimit rlim = {0, 0}; - (void)setrlimit(RLIMIT_CORE, &rlim); - - /* Hide "segfault at ffffffffff600000" messages. */ - struct sigaction act; - memset(&act, 0, sizeof(struct sigaction)); - act.sa_flags = SA_SIGINFO; - act.sa_sigaction = sigaction_SIGSEGV; - (void)sigaction(SIGSEGV, &act, NULL); + g_vsyscall = 1; *(volatile int *)0xffffffffff600000UL; - exit(0); + + g_vsyscall = 2; + exit(g_vsyscall); } waitpid(pid, &wstatus, 0); - if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { - /* vsyscall page is readable and executable. */ - g_vsyscall = 1; - return; + if (WIFEXITED(wstatus)) { + g_vsyscall = WEXITSTATUS(wstatus); + } else { + fprintf(stderr, "error: wstatus %08x\n", wstatus); + exit(1); } - - /* vsyscall page is executable but unreadable. */ - g_vsyscall = 2; } int main(void) diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 057a4f49c79d..c7363c6764fc 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -371,6 +371,10 @@ class Client: fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags) self.tpm_poll = select.poll() + def __del__(self): + if self.tpm: + self.tpm.close() + def close(self): self.tpm.close() diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index a80fb5ef61d5..404a2713dcae 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -22,6 +22,11 @@ const char *enable_file = "/sys/kernel/debug/tracing/events/user_events/__test_e const char *trace_file = "/sys/kernel/debug/tracing/trace"; const char *fmt_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/format"; +static inline int status_check(char *status_page, int status_bit) +{ + return status_page[status_bit >> 3] & (1 << (status_bit & 7)); +} + static int trace_bytes(void) { int fd = open(trace_file, O_RDONLY); @@ -197,12 +202,12 @@ TEST_F(user, register_events) { /* Register should work */ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); - ASSERT_NE(0, reg.status_index); + ASSERT_NE(0, reg.status_bit); /* Multiple registers should result in same index */ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); - ASSERT_NE(0, reg.status_index); + ASSERT_NE(0, reg.status_bit); /* Ensure disabled */ self->enable_fd = open(enable_file, O_RDWR); @@ -212,15 +217,15 @@ TEST_F(user, register_events) { /* MMAP should work and be zero'd */ ASSERT_NE(MAP_FAILED, status_page); ASSERT_NE(NULL, status_page); - ASSERT_EQ(0, status_page[reg.status_index]); + ASSERT_EQ(0, status_check(status_page, reg.status_bit)); /* Enable event and ensure bits updated in status */ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) - ASSERT_EQ(EVENT_STATUS_FTRACE, status_page[reg.status_index]); + ASSERT_NE(0, status_check(status_page, reg.status_bit)); /* Disable event and ensure bits updated in status */ ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0"))) - ASSERT_EQ(0, status_page[reg.status_index]); + ASSERT_EQ(0, status_check(status_page, reg.status_bit)); /* File still open should return -EBUSY for delete */ ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event")); @@ -240,6 +245,8 @@ TEST_F(user, write_events) { struct iovec io[3]; __u32 field1, field2; int before = 0, after = 0; + int page_size = sysconf(_SC_PAGESIZE); + char *status_page; reg.size = sizeof(reg); reg.name_args = (__u64)"__test_event u32 field1; u32 field2"; @@ -254,10 +261,18 @@ TEST_F(user, write_events) { io[2].iov_base = &field2; io[2].iov_len = sizeof(field2); + status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED, + self->status_fd, 0); + /* Register should work */ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); - ASSERT_NE(0, reg.status_index); + ASSERT_NE(0, reg.status_bit); + + /* MMAP should work and be zero'd */ + ASSERT_NE(MAP_FAILED, status_page); + ASSERT_NE(NULL, status_page); + ASSERT_EQ(0, status_check(status_page, reg.status_bit)); /* Write should fail on invalid slot with ENOENT */ io[0].iov_base = &field2; @@ -271,6 +286,9 @@ TEST_F(user, write_events) { self->enable_fd = open(enable_file, O_RDWR); ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) + /* Event should now be enabled */ + ASSERT_NE(0, status_check(status_page, reg.status_bit)); + /* Write should make it out to ftrace buffers */ before = trace_bytes(); ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3)); @@ -298,7 +316,7 @@ TEST_F(user, write_fault) { /* Register should work */ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); - ASSERT_NE(0, reg.status_index); + ASSERT_NE(0, reg.status_bit); /* Write should work normally */ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2)); @@ -315,6 +333,11 @@ TEST_F(user, write_validator) { int loc, bytes; char data[8]; int before = 0, after = 0; + int page_size = sysconf(_SC_PAGESIZE); + char *status_page; + + status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED, + self->status_fd, 0); reg.size = sizeof(reg); reg.name_args = (__u64)"__test_event __rel_loc char[] data"; @@ -322,7 +345,12 @@ TEST_F(user, write_validator) { /* Register should work */ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); - ASSERT_NE(0, reg.status_index); + ASSERT_NE(0, reg.status_bit); + + /* MMAP should work and be zero'd */ + ASSERT_NE(MAP_FAILED, status_page); + ASSERT_NE(NULL, status_page); + ASSERT_EQ(0, status_check(status_page, reg.status_bit)); io[0].iov_base = ®.write_index; io[0].iov_len = sizeof(reg.write_index); @@ -340,6 +368,9 @@ TEST_F(user, write_validator) { self->enable_fd = open(enable_file, O_RDWR); ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) + /* Event should now be enabled */ + ASSERT_NE(0, status_check(status_page, reg.status_bit)); + /* Full in-bounds write should work */ before = trace_bytes(); loc = DYN_LOC(0, bytes); diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 26851d51d6bb..8b4c7879d5a7 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -35,6 +35,11 @@ static long perf_event_open(struct perf_event_attr *pe, pid_t pid, return syscall(__NR_perf_event_open, pe, pid, cpu, group_fd, flags); } +static inline int status_check(char *status_page, int status_bit) +{ + return status_page[status_bit >> 3] & (1 << (status_bit & 7)); +} + static int get_id(void) { FILE *fp = fopen(id_file, "r"); @@ -120,8 +125,8 @@ TEST_F(user, perf_write) { /* Register should work */ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); - ASSERT_NE(0, reg.status_index); - ASSERT_EQ(0, status_page[reg.status_index]); + ASSERT_NE(0, reg.status_bit); + ASSERT_EQ(0, status_check(status_page, reg.status_bit)); /* Id should be there */ id = get_id(); @@ -144,7 +149,7 @@ TEST_F(user, perf_write) { ASSERT_NE(MAP_FAILED, perf_page); /* Status should be updated */ - ASSERT_EQ(EVENT_STATUS_PERF, status_page[reg.status_index]); + ASSERT_NE(0, status_check(status_page, reg.status_bit)); event.index = reg.write_index; event.field1 = 0xc001; diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 31e5eea2a9b9..7b9dc2426f18 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -30,7 +30,6 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret -local_config.* soft-dirty split_huge_page_test ksm_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d516b8c38eed..163c2fde3cb3 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests -LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h - -include local_config.mk +LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') @@ -97,9 +95,11 @@ TEST_FILES += va_128TBswitch.sh include ../lib.mk +$(OUTPUT)/khugepaged: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c +$(OUTPUT)/userfaultfd: vm_util.c ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) @@ -152,23 +152,6 @@ endif $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap -# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. -$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) - $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma - -local_config.mk local_config.h: check_config.sh - /bin/sh ./check_config.sh $(CC) - -EXTRA_CLEAN += local_config.mk local_config.h - -ifeq ($(HMM_EXTRA_LIBS),) -all: warn_missing_hugelibs - -warn_missing_hugelibs: - @echo ; \ - echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \ - echo -endif diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh deleted file mode 100644 index 079c8a40b85d..000000000000 --- a/tools/testing/selftests/vm/check_config.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# Probe for libraries and create header files to record the results. Both C -# header files and Makefile include fragments are created. - -OUTPUT_H_FILE=local_config.h -OUTPUT_MKFILE=local_config.mk - -# libhugetlbfs -tmpname=$(mktemp) -tmpfile_c=${tmpname}.c -tmpfile_o=${tmpname}.o - -echo "#include <sys/types.h>" > $tmpfile_c -echo "#include <hugetlbfs.h>" >> $tmpfile_c -echo "int func(void) { return 0; }" >> $tmpfile_c - -CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"} -$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 - -if [ -f $tmpfile_o ]; then - echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE - echo "HMM_EXTRA_LIBS = -lhugetlbfs" > $OUTPUT_MKFILE -else - echo "// No libhugetlbfs support found" > $OUTPUT_H_FILE - echo "# No libhugetlbfs support found, so:" > $OUTPUT_MKFILE - echo "HMM_EXTRA_LIBS = " >> $OUTPUT_MKFILE -fi - -rm ${tmpname}.* diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 98b949c279be..4adaad1b822f 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -26,10 +26,6 @@ #include <sys/mman.h> #include <sys/ioctl.h> -#include "./local_config.h" -#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS -#include <hugetlbfs.h> -#endif /* * This is a private UAPI to the kernel test module so it isn't exported @@ -733,7 +729,54 @@ TEST_F(hmm, anon_write_huge) hmm_buffer_free(buffer); } -#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS +/* + * Read numeric data from raw and tagged kernel status files. Used to read + * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag). + */ +static long file_read_ulong(char *file, const char *tag) +{ + int fd; + char buf[2048]; + int len; + char *p, *q; + long val; + + fd = open(file, O_RDONLY); + if (fd < 0) { + /* Error opening the file */ + return -1; + } + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) { + /* Error in reading the file */ + return -1; + } + if (len == sizeof(buf)) { + /* Error file is too large */ + return -1; + } + buf[len] = '\0'; + + /* Search for a tag if provided */ + if (tag) { + p = strstr(buf, tag); + if (!p) + return -1; /* looks like the line we want isn't there */ + p += strlen(tag); + } else + p = buf; + + val = strtol(p, &q, 0); + if (*q != ' ') { + /* Error parsing the file */ + return -1; + } + + return val; +} + /* * Write huge TLBFS page. */ @@ -742,29 +785,27 @@ TEST_F(hmm, anon_write_hugetlbfs) struct hmm_buffer *buffer; unsigned long npages; unsigned long size; + unsigned long default_hsize; unsigned long i; int *ptr; int ret; - long pagesizes[4]; - int n, idx; - /* Skip test if we can't allocate a hugetlbfs page. */ - - n = gethugepagesizes(pagesizes, 4); - if (n <= 0) + default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:"); + if (default_hsize < 0 || default_hsize*1024 < default_hsize) SKIP(return, "Huge page size could not be determined"); - for (idx = 0; --n > 0; ) { - if (pagesizes[n] < pagesizes[idx]) - idx = n; - } - size = ALIGN(TWOMEG, pagesizes[idx]); + default_hsize = default_hsize*1024; /* KB to B */ + + size = ALIGN(TWOMEG, default_hsize); npages = size >> self->page_shift; buffer = malloc(sizeof(*buffer)); ASSERT_NE(buffer, NULL); - buffer->ptr = get_hugepage_region(size, GHR_STRICT); - if (buffer->ptr == NULL) { + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (buffer->ptr == MAP_FAILED) { free(buffer); SKIP(return, "Huge page could not be allocated"); } @@ -788,11 +829,10 @@ TEST_F(hmm, anon_write_hugetlbfs) for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); - free_hugepage_region(buffer->ptr); + munmap(buffer->ptr, buffer->size); buffer->ptr = NULL; hmm_buffer_free(buffer); } -#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */ /* * Read mmap'ed file memory. @@ -1014,6 +1054,55 @@ TEST_F(hmm, migrate_fault) hmm_buffer_free(buffer); } +TEST_F(hmm, migrate_release) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Release device memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages); + ASSERT_EQ(ret, 0); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + /* * Migrate anonymous shared memory to device private memory. */ @@ -1467,7 +1556,6 @@ TEST_F(hmm2, snapshot) hmm_buffer_free(buffer); } -#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS /* * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that * should be mapped by a large page table entry. @@ -1477,30 +1565,30 @@ TEST_F(hmm, compound) struct hmm_buffer *buffer; unsigned long npages; unsigned long size; + unsigned long default_hsize; int *ptr; unsigned char *m; int ret; - long pagesizes[4]; - int n, idx; unsigned long i; /* Skip test if we can't allocate a hugetlbfs page. */ - n = gethugepagesizes(pagesizes, 4); - if (n <= 0) - return; - for (idx = 0; --n > 0; ) { - if (pagesizes[n] < pagesizes[idx]) - idx = n; - } - size = ALIGN(TWOMEG, pagesizes[idx]); + default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:"); + if (default_hsize < 0 || default_hsize*1024 < default_hsize) + SKIP(return, "Huge page size could not be determined"); + default_hsize = default_hsize*1024; /* KB to B */ + + size = ALIGN(TWOMEG, default_hsize); npages = size >> self->page_shift; buffer = malloc(sizeof(*buffer)); ASSERT_NE(buffer, NULL); - buffer->ptr = get_hugepage_region(size, GHR_STRICT); - if (buffer->ptr == NULL) { + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (buffer->ptr == MAP_FAILED) { free(buffer); return; } @@ -1539,11 +1627,10 @@ TEST_F(hmm, compound) ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ | HMM_DMIRROR_PROT_PMD); - free_hugepage_region(buffer->ptr); + munmap(buffer->ptr, buffer->size); buffer->ptr = NULL; hmm_buffer_free(buffer); } -#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */ /* * Test two devices reading the same memory (double mapped). diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 155120b67a16..64126c8cd561 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -1,6 +1,9 @@ #define _GNU_SOURCE +#include <ctype.h> +#include <errno.h> #include <fcntl.h> #include <limits.h> +#include <dirent.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> @@ -10,10 +13,24 @@ #include <sys/mman.h> #include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/vfs.h> + +#include "linux/magic.h" + +#include "vm_util.h" #ifndef MADV_PAGEOUT #define MADV_PAGEOUT 21 #endif +#ifndef MADV_POPULATE_READ +#define MADV_POPULATE_READ 22 +#endif +#ifndef MADV_COLLAPSE +#define MADV_COLLAPSE 25 +#endif #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; @@ -22,6 +39,47 @@ static int hpage_pmd_nr; #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" +#define TEST_FILE "collapse_test_file" + +#define MAX_LINE_LENGTH 500 + +enum vma_type { + VMA_ANON, + VMA_FILE, + VMA_SHMEM, +}; + +struct mem_ops { + void *(*setup_area)(int nr_hpages); + void (*cleanup_area)(void *p, unsigned long size); + void (*fault)(void *p, unsigned long start, unsigned long end); + bool (*check_huge)(void *addr, int nr_hpages); + const char *name; +}; + +static struct mem_ops *file_ops; +static struct mem_ops *anon_ops; +static struct mem_ops *shmem_ops; + +struct collapse_context { + void (*collapse)(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect); + bool enforce_pte_scan_limits; + const char *name; +}; + +static struct collapse_context *khugepaged_context; +static struct collapse_context *madvise_context; + +struct file_info { + const char *dir; + char path[PATH_MAX]; + enum vma_type type; + int fd; + char dev_queue_read_ahead_path[PATH_MAX]; +}; + +static struct file_info finfo; enum thp_enabled { THP_ALWAYS, @@ -88,18 +146,7 @@ struct settings { enum shmem_enabled shmem_enabled; bool use_zero_page; struct khugepaged_settings khugepaged; -}; - -static struct settings default_settings = { - .thp_enabled = THP_MADVISE, - .thp_defrag = THP_DEFRAG_ALWAYS, - .shmem_enabled = SHMEM_NEVER, - .use_zero_page = 0, - .khugepaged = { - .defrag = 1, - .alloc_sleep_millisecs = 10, - .scan_sleep_millisecs = 10, - }, + unsigned long read_ahead_kb; }; static struct settings saved_settings; @@ -118,6 +165,11 @@ static void fail(const char *msg) exit_status++; } +static void skip(const char *msg) +{ + printf(" \e[33m%s\e[0m\n", msg); +} + static int read_file(const char *path, char *buf, size_t buflen) { int fd; @@ -145,13 +197,19 @@ static int write_file(const char *path, const char *buf, size_t buflen) ssize_t numwritten; fd = open(path, O_WRONLY); - if (fd == -1) + if (fd == -1) { + printf("open(%s)\n", path); + exit(EXIT_FAILURE); return 0; + } numwritten = write(fd, buf, buflen - 1); close(fd); - if (numwritten < 1) + if (numwritten < 1) { + printf("write(%s)\n", buf); + exit(EXIT_FAILURE); return 0; + } return (unsigned int) numwritten; } @@ -218,20 +276,11 @@ static void write_string(const char *name, const char *val) } } -static const unsigned long read_num(const char *name) +static const unsigned long _read_num(const char *path) { - char path[PATH_MAX]; char buf[21]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - ret = read_file(path, buf, sizeof(buf)); - if (ret < 0) { + if (read_file(path, buf, sizeof(buf)) < 0) { perror("read_file(read_num)"); exit(EXIT_FAILURE); } @@ -239,10 +288,9 @@ static const unsigned long read_num(const char *name) return strtoul(buf, NULL, 10); } -static void write_num(const char *name, unsigned long num) +static const unsigned long read_num(const char *name) { char path[PATH_MAX]; - char buf[21]; int ret; ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); @@ -250,6 +298,12 @@ static void write_num(const char *name, unsigned long num) printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); } + return _read_num(path); +} + +static void _write_num(const char *path, unsigned long num) +{ + char buf[21]; sprintf(buf, "%ld", num); if (!write_file(path, buf, strlen(buf) + 1)) { @@ -258,6 +312,19 @@ static void write_num(const char *name, unsigned long num) } } +static void write_num(const char *name, unsigned long num) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + _write_num(path, num); +} + static void write_settings(struct settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; @@ -277,6 +344,43 @@ static void write_settings(struct settings *settings) write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); + + if (file_ops && finfo.type == VMA_FILE) + _write_num(finfo.dev_queue_read_ahead_path, + settings->read_ahead_kb); +} + +#define MAX_SETTINGS_DEPTH 4 +static struct settings settings_stack[MAX_SETTINGS_DEPTH]; +static int settings_index; + +static struct settings *current_settings(void) +{ + if (!settings_index) { + printf("Fail: No settings set"); + exit(EXIT_FAILURE); + } + return settings_stack + settings_index - 1; +} + +static void push_settings(struct settings *settings) +{ + if (settings_index >= MAX_SETTINGS_DEPTH) { + printf("Fail: Settings stack exceeded"); + exit(EXIT_FAILURE); + } + settings_stack[settings_index++] = *settings; + write_settings(current_settings()); +} + +static void pop_settings(void) +{ + if (settings_index <= 0) { + printf("Fail: Settings stack empty"); + exit(EXIT_FAILURE); + } + --settings_index; + write_settings(current_settings()); } static void restore_settings(int sig) @@ -314,6 +418,10 @@ static void save_settings(void) .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), .pages_to_scan = read_num("khugepaged/pages_to_scan"), }; + if (file_ops && finfo.type == VMA_FILE) + saved_settings.read_ahead_kb = + _read_num(finfo.dev_queue_read_ahead_path); + success("OK"); signal(SIGTERM, restore_settings); @@ -322,72 +430,90 @@ static void save_settings(void) signal(SIGQUIT, restore_settings); } -static void adjust_settings(void) +static void get_finfo(const char *dir) { + struct stat path_stat; + struct statfs fs; + char buf[1 << 10]; + char path[PATH_MAX]; + char *str, *end; - printf("Adjust settings..."); - write_settings(&default_settings); - success("OK"); -} - -#define MAX_LINE_LENGTH 500 - -static bool check_for_pattern(FILE *fp, char *pattern, char *buf) -{ - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { - if (!strncmp(buf, pattern, strlen(pattern))) - return true; + finfo.dir = dir; + stat(finfo.dir, &path_stat); + if (!S_ISDIR(path_stat.st_mode)) { + printf("%s: Not a directory (%s)\n", __func__, finfo.dir); + exit(EXIT_FAILURE); } - return false; -} - -static bool check_huge(void *addr) -{ - bool thp = false; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); + if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE, + finfo.dir) >= sizeof(finfo.path)) { + printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); } - - - fp = fopen(PID_SMAPS, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); + if (statfs(finfo.dir, &fs)) { + perror("statfs()"); exit(EXIT_FAILURE); } - if (!check_for_pattern(fp, addr_pattern, buffer)) - goto err_out; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB", - hpage_pmd_size >> 10); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); + finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE; + if (finfo.type == VMA_SHMEM) + return; + + /* Find owning device's queue/read_ahead_kb control */ + if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent", + major(path_stat.st_dev), minor(path_stat.st_dev)) + >= sizeof(path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + if (read_file(path, buf, sizeof(buf)) < 0) { + perror("read_file(read_num)"); + exit(EXIT_FAILURE); + } + if (strstr(buf, "DEVTYPE=disk")) { + /* Found it */ + if (snprintf(finfo.dev_queue_read_ahead_path, + sizeof(finfo.dev_queue_read_ahead_path), + "/sys/dev/block/%d:%d/queue/read_ahead_kb", + major(path_stat.st_dev), minor(path_stat.st_dev)) + >= sizeof(finfo.dev_queue_read_ahead_path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return; + } + if (!strstr(buf, "DEVTYPE=partition")) { + printf("%s: Unknown device type: %s\n", __func__, path); exit(EXIT_FAILURE); } /* - * Fetch the AnonHugePages: in the same block and check whether it got - * the expected number of hugeepages next. + * Partition of block device - need to find actual device. + * Using naming convention that devnameN is partition of + * device devname. */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) - goto err_out; - - if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) - goto err_out; - - thp = true; -err_out: - fclose(fp); - return thp; + str = strstr(buf, "DEVNAME="); + if (!str) { + printf("%s: Could not read: %s", __func__, path); + exit(EXIT_FAILURE); + } + str += 8; + end = str; + while (*end) { + if (isdigit(*end)) { + *end = '\0'; + if (snprintf(finfo.dev_queue_read_ahead_path, + sizeof(finfo.dev_queue_read_ahead_path), + "/sys/block/%s/queue/read_ahead_kb", + str) >= sizeof(finfo.dev_queue_read_ahead_path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return; + } + ++end; + } + printf("%s: Could not read: %s\n", __func__, path); + exit(EXIT_FAILURE); } - static bool check_swap(void *addr, unsigned long size) { bool swap = false; @@ -409,7 +535,7 @@ static bool check_swap(void *addr, unsigned long size) printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); exit(EXIT_FAILURE); } - if (!check_for_pattern(fp, addr_pattern, buffer)) + if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) goto err_out; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB", @@ -422,7 +548,7 @@ static bool check_swap(void *addr, unsigned long size) * Fetch the Swap: in the same block and check whether it got * the expected number of hugeepages next. */ - if (!check_for_pattern(fp, "Swap:", buffer)) + if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer))) goto err_out; if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) @@ -434,12 +560,12 @@ err_out: return swap; } -static void *alloc_mapping(void) +static void *alloc_mapping(int nr) { void *p; - p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (p != BASE_ADDR) { printf("Failed to allocate VMA at %p\n", BASE_ADDR); exit(EXIT_FAILURE); @@ -456,6 +582,60 @@ static void fill_memory(int *p, unsigned long start, unsigned long end) p[i * page_size / sizeof(*p)] = i + 0xdead0000; } +/* + * MADV_COLLAPSE is a best-effort request and may fail if an internal + * resource is temporarily unavailable, in which case it will set errno to + * EAGAIN. In such a case, immediately reattempt the operation one more + * time. + */ +static int madvise_collapse_retry(void *p, unsigned long size) +{ + bool retry = true; + int ret; + +retry: + ret = madvise(p, size, MADV_COLLAPSE); + if (ret && errno == EAGAIN && retry) { + retry = false; + goto retry; + } + return ret; +} + +/* + * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with + * validate_memory()'able contents. + */ +static void *alloc_hpage(struct mem_ops *ops) +{ + void *p = ops->setup_area(1); + + ops->fault(p, 0, hpage_pmd_size); + + /* + * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE. + * The latter is ineligible for collapse by MADV_COLLAPSE + * while the former might cause MADV_COLLAPSE to race with + * khugepaged on low-load system (like a test machine), which + * would cause MADV_COLLAPSE to fail with EAGAIN. + */ + printf("Allocate huge page..."); + if (madvise_collapse_retry(p, hpage_pmd_size)) { + perror("madvise(MADV_COLLAPSE)"); + exit(EXIT_FAILURE); + } + if (!ops->check_huge(p, 1)) { + perror("madvise(MADV_COLLAPSE)"); + exit(EXIT_FAILURE); + } + if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) { + perror("madvise(MADV_HUGEPAGE)"); + exit(EXIT_FAILURE); + } + success("OK"); + return p; +} + static void validate_memory(int *p, unsigned long start, unsigned long end) { int i; @@ -469,26 +649,216 @@ static void validate_memory(int *p, unsigned long start, unsigned long end) } } +static void *anon_setup_area(int nr_hpages) +{ + return alloc_mapping(nr_hpages); +} + +static void anon_cleanup_area(void *p, unsigned long size) +{ + munmap(p, size); +} + +static void anon_fault(void *p, unsigned long start, unsigned long end) +{ + fill_memory(p, start, end); +} + +static bool anon_check_huge(void *addr, int nr_hpages) +{ + return check_huge_anon(addr, nr_hpages, hpage_pmd_size); +} + +static void *file_setup_area(int nr_hpages) +{ + int fd; + void *p; + unsigned long size; + + unlink(finfo.path); /* Cleanup from previous failed tests */ + printf("Creating %s for collapse%s...", finfo.path, + finfo.type == VMA_SHMEM ? " (tmpfs)" : ""); + fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL, + 777); + if (fd < 0) { + perror("open()"); + exit(EXIT_FAILURE); + } + + size = nr_hpages * hpage_pmd_size; + p = alloc_mapping(nr_hpages); + fill_memory(p, 0, size); + write(fd, p, size); + close(fd); + munmap(p, size); + success("OK"); + + printf("Opening %s read only for collapse...", finfo.path); + finfo.fd = open(finfo.path, O_RDONLY, 777); + if (finfo.fd < 0) { + perror("open()"); + exit(EXIT_FAILURE); + } + p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, + MAP_PRIVATE, finfo.fd, 0); + if (p == MAP_FAILED || p != BASE_ADDR) { + perror("mmap()"); + exit(EXIT_FAILURE); + } + + /* Drop page cache */ + write_file("/proc/sys/vm/drop_caches", "3", 2); + success("OK"); + return p; +} + +static void file_cleanup_area(void *p, unsigned long size) +{ + munmap(p, size); + close(finfo.fd); + unlink(finfo.path); +} + +static void file_fault(void *p, unsigned long start, unsigned long end) +{ + if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) { + perror("madvise(MADV_POPULATE_READ"); + exit(EXIT_FAILURE); + } +} + +static bool file_check_huge(void *addr, int nr_hpages) +{ + switch (finfo.type) { + case VMA_FILE: + return check_huge_file(addr, nr_hpages, hpage_pmd_size); + case VMA_SHMEM: + return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); + default: + exit(EXIT_FAILURE); + return false; + } +} + +static void *shmem_setup_area(int nr_hpages) +{ + void *p; + unsigned long size = nr_hpages * hpage_pmd_size; + + finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0); + if (finfo.fd < 0) { + perror("memfd_create()"); + exit(EXIT_FAILURE); + } + if (ftruncate(finfo.fd, size)) { + perror("ftruncate()"); + exit(EXIT_FAILURE); + } + p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd, + 0); + if (p != BASE_ADDR) { + perror("mmap()"); + exit(EXIT_FAILURE); + } + return p; +} + +static void shmem_cleanup_area(void *p, unsigned long size) +{ + munmap(p, size); + close(finfo.fd); +} + +static bool shmem_check_huge(void *addr, int nr_hpages) +{ + return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); +} + +static struct mem_ops __anon_ops = { + .setup_area = &anon_setup_area, + .cleanup_area = &anon_cleanup_area, + .fault = &anon_fault, + .check_huge = &anon_check_huge, + .name = "anon", +}; + +static struct mem_ops __file_ops = { + .setup_area = &file_setup_area, + .cleanup_area = &file_cleanup_area, + .fault = &file_fault, + .check_huge = &file_check_huge, + .name = "file", +}; + +static struct mem_ops __shmem_ops = { + .setup_area = &shmem_setup_area, + .cleanup_area = &shmem_cleanup_area, + .fault = &anon_fault, + .check_huge = &shmem_check_huge, + .name = "shmem", +}; + +static void __madvise_collapse(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect) +{ + int ret; + struct settings settings = *current_settings(); + + printf("%s...", msg); + + /* + * Prevent khugepaged interference and tests that MADV_COLLAPSE + * ignores /sys/kernel/mm/transparent_hugepage/enabled + */ + settings.thp_enabled = THP_NEVER; + settings.shmem_enabled = SHMEM_NEVER; + push_settings(&settings); + + /* Clear VM_NOHUGEPAGE */ + madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); + ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size); + if (((bool)ret) == expect) + fail("Fail: Bad return value"); + else if (!ops->check_huge(p, expect ? nr_hpages : 0)) + fail("Fail: check_huge()"); + else + success("OK"); + + pop_settings(); +} + +static void madvise_collapse(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect) +{ + /* Sanity check */ + if (!ops->check_huge(p, 0)) { + printf("Unexpected huge page\n"); + exit(EXIT_FAILURE); + } + __madvise_collapse(msg, p, nr_hpages, ops, expect); +} + #define TICK 500000 -static bool wait_for_scan(const char *msg, char *p) +static bool wait_for_scan(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops) { int full_scans; int timeout = 6; /* 3 seconds */ /* Sanity check */ - if (check_huge(p)) { + if (!ops->check_huge(p, 0)) { printf("Unexpected huge page\n"); exit(EXIT_FAILURE); } - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ full_scans = read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { - if (check_huge(p)) + if (ops->check_huge(p, nr_hpages)) break; if (read_num("khugepaged/full_scans") >= full_scans) break; @@ -496,122 +866,155 @@ static bool wait_for_scan(const char *msg, char *p) usleep(TICK); } - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); return timeout == -1; } +static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect) +{ + if (wait_for_scan(msg, p, nr_hpages, ops)) { + if (expect) + fail("Timeout"); + else + success("OK"); + return; + } + + /* + * For file and shmem memory, khugepaged only retracts pte entries after + * putting the new hugepage in the page cache. The hugepage must be + * subsequently refaulted to install the pmd mapping for the mm. + */ + if (ops != &__anon_ops) + ops->fault(p, 0, nr_hpages * hpage_pmd_size); + + if (ops->check_huge(p, expect ? nr_hpages : 0)) + success("OK"); + else + fail("Fail"); +} + +static struct collapse_context __khugepaged_context = { + .collapse = &khugepaged_collapse, + .enforce_pte_scan_limits = true, + .name = "khugepaged", +}; + +static struct collapse_context __madvise_context = { + .collapse = &madvise_collapse, + .enforce_pte_scan_limits = false, + .name = "madvise", +}; + +static bool is_tmpfs(struct mem_ops *ops) +{ + return ops == &__file_ops && finfo.type == VMA_SHMEM; +} + static void alloc_at_fault(void) { - struct settings settings = default_settings; + struct settings settings = *current_settings(); char *p; settings.thp_enabled = THP_ALWAYS; - write_settings(&settings); + push_settings(&settings); - p = alloc_mapping(); + p = alloc_mapping(1); *p = 1; printf("Allocate huge page on fault..."); - if (check_huge(p)) + if (check_huge_anon(p, 1, hpage_pmd_size)) success("OK"); else fail("Fail"); - write_settings(&default_settings); + pop_settings(); madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); - if (!check_huge(p)) + if (check_huge_anon(p, 0, hpage_pmd_size)) success("OK"); else fail("Fail"); munmap(p, hpage_pmd_size); } -static void collapse_full(void) +static void collapse_full(struct collapse_context *c, struct mem_ops *ops) { void *p; - - p = alloc_mapping(); - fill_memory(p, 0, hpage_pmd_size); - if (wait_for_scan("Collapse fully populated PTE table", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + int nr_hpages = 4; + unsigned long size = nr_hpages * hpage_pmd_size; + + p = ops->setup_area(nr_hpages); + ops->fault(p, 0, size); + c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages, + ops, true); + validate_memory(p, 0, size); + ops->cleanup_area(p, size); } -static void collapse_empty(void) +static void collapse_empty(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(); - if (wait_for_scan("Do not collapse empty PTE table", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); - munmap(p, hpage_pmd_size); + p = ops->setup_area(1); + c->collapse("Do not collapse empty PTE table", p, 1, ops, false); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_single_pte_entry(void) +static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(); - fill_memory(p, 0, page_size); - if (wait_for_scan("Collapse PTE table with single PTE entry present", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + p = ops->setup_area(1); + ops->fault(p, 0, page_size); + c->collapse("Collapse PTE table with single PTE entry present", p, + 1, ops, true); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_max_ptes_none(void) +static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = default_settings; + struct settings settings = *current_settings(); void *p; settings.khugepaged.max_ptes_none = max_ptes_none; - write_settings(&settings); + push_settings(&settings); - p = alloc_mapping(); + p = ops->setup_area(1); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + if (is_tmpfs(ops)) { + /* shmem pages always in the page cache */ + printf("tmpfs..."); + skip("Skip"); + goto skip; + } - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, + ops, !c->enforce_pte_scan_limits); + validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - munmap(p, hpage_pmd_size); - write_settings(&default_settings); + if (c->enforce_pte_scan_limits) { + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); + c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops, + true); + validate_memory(p, 0, + (hpage_pmd_nr - max_ptes_none) * page_size); + } +skip: + ops->cleanup_area(p, hpage_pmd_size); + pop_settings(); } -static void collapse_swapin_single_pte(void) +static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(); - fill_memory(p, 0, hpage_pmd_size); + + p = ops->setup_area(1); + ops->fault(p, 0, hpage_pmd_size); printf("Swapout one page..."); if (madvise(p, page_size, MADV_PAGEOUT)) { @@ -625,25 +1028,21 @@ static void collapse_swapin_single_pte(void) goto out; } - if (wait_for_scan("Collapse with swapping in single PTE entry", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse with swapping in single PTE entry", p, 1, ops, + true); validate_memory(p, 0, hpage_pmd_size); out: - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_max_ptes_swap(void) +static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); void *p; - p = alloc_mapping(); + p = ops->setup_area(1); + ops->fault(p, 0, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) { perror("madvise(MADV_PAGEOUT)"); @@ -656,115 +1055,93 @@ static void collapse_max_ptes_swap(void) goto out; } - if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); + c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops, + !c->enforce_pte_scan_limits); validate_memory(p, 0, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); - printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr); - if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, max_ptes_swap * page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } + if (c->enforce_pte_scan_limits) { + ops->fault(p, 0, hpage_pmd_size); + printf("Swapout %d of %d pages...", max_ptes_swap, + hpage_pmd_nr); + if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { + perror("madvise(MADV_PAGEOUT)"); + exit(EXIT_FAILURE); + } + if (check_swap(p, max_ptes_swap * page_size)) { + success("OK"); + } else { + fail("Fail"); + goto out; + } - if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); + c->collapse("Collapse with max_ptes_swap pages swapped out", p, + 1, ops, true); + validate_memory(p, 0, hpage_pmd_size); + } out: - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_single_pte_entry_compound(void) +static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(); + p = alloc_hpage(ops); - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); + if (is_tmpfs(ops)) { + /* MADV_DONTNEED won't evict tmpfs pages */ + printf("tmpfs..."); + skip("Skip"); + goto skip; + } + madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); printf("Split huge page leaving single PTE mapping compound page..."); madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); - if (!check_huge(p)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table with single PTE mapping compound page", + p, 1, ops, true); validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); +skip: + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_full_of_compound(void) +static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - + p = alloc_hpage(ops); printf("Split huge page leaving single PTE page table full of compound pages..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - if (wait_for_scan("Collapse PTE table full of compound pages", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table full of compound pages", p, 1, ops, + true); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_compound_extreme(void) +static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops) { void *p; int i; - p = alloc_mapping(); + p = ops->setup_area(1); for (i = 0; i < hpage_pmd_nr; i++) { printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", i + 1, hpage_pmd_nr); madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(BASE_ADDR, 0, hpage_pmd_size); - if (!check_huge(BASE_ADDR)) { + ops->fault(BASE_ADDR, 0, hpage_pmd_size); + if (!ops->check_huge(BASE_ADDR, 1)) { printf("Failed to allocate huge page\n"); exit(EXIT_FAILURE); } @@ -791,34 +1168,30 @@ static void collapse_compound_extreme(void) } } - munmap(BASE_ADDR, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); - if (!check_huge(p)) + ops->cleanup_area(BASE_ADDR, hpage_pmd_size); + ops->fault(p, 0, hpage_pmd_size); + if (!ops->check_huge(p, 1)) success("OK"); else fail("Fail"); - if (wait_for_scan("Collapse PTE table full of different compound pages", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table full of different compound pages", p, 1, + ops, true); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_fork(void) +static void collapse_fork(struct collapse_context *c, struct mem_ops *ops) { int wstatus; void *p; - p = alloc_mapping(); + p = ops->setup_area(1); printf("Allocate small page..."); - fill_memory(p, 0, page_size); - if (!check_huge(p)) + ops->fault(p, 0, page_size); + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -829,22 +1202,17 @@ static void collapse_fork(void) skip_settings_restore = true; exit_status = 0; - if (!check_huge(p)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - fill_memory(p, page_size, 2 * page_size); - - if (wait_for_scan("Collapse PTE table with single page shared with parent process", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + ops->fault(p, page_size, 2 * page_size); + c->collapse("Collapse PTE table with single page shared with parent process", + p, 1, ops, true); validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); exit(exit_status); } @@ -852,36 +1220,27 @@ static void collapse_fork(void) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has small page..."); - if (!check_huge(p)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_fork_compound(void) +static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops) { int wstatus; void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - + p = alloc_hpage(ops); printf("Share huge page over fork()..."); if (!fork()) { /* Do not touch settings on child exit */ skip_settings_restore = true; exit_status = 0; - if (check_huge(p)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -889,24 +1248,20 @@ static void collapse_fork_compound(void) printf("Split huge page PMD in child process..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - fill_memory(p, 0, page_size); + ops->fault(p, 0, page_size); write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); - if (wait_for_scan("Collapse PTE table full of compound pages in child", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table full of compound pages in child", + p, 1, ops, true); write_num("khugepaged/max_ptes_shared", - default_settings.khugepaged.max_ptes_shared); + current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); exit(exit_status); } @@ -914,74 +1269,59 @@ static void collapse_fork_compound(void) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_max_ptes_shared() +static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); int wstatus; void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - + p = alloc_hpage(ops); printf("Share huge page over fork()..."); if (!fork()) { /* Do not touch settings on child exit */ skip_settings_restore = true; exit_status = 0; - if (check_huge(p)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p)) - fail("Timeout"); - else if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Trigger CoW on page %d of %d...", - hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); - if (!check_huge(p)) + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - - if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Maybe collapse with max_ptes_shared exceeded", p, + 1, ops, !c->enforce_pte_scan_limits); + + if (c->enforce_pte_scan_limits) { + printf("Trigger CoW on page %d of %d...", + hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) * + page_size); + if (ops->check_huge(p, 0)) + success("OK"); + else + fail("Fail"); + + c->collapse("Collapse with max_ptes_shared PTEs shared", + p, 1, ops, true); + } validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); exit(exit_status); } @@ -989,20 +1329,153 @@ static void collapse_max_ptes_shared() exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -int main(void) +static void madvise_collapse_existing_thps(struct collapse_context *c, + struct mem_ops *ops) { + void *p; + + p = ops->setup_area(1); + ops->fault(p, 0, hpage_pmd_size); + c->collapse("Collapse fully populated PTE table...", p, 1, ops, true); + validate_memory(p, 0, hpage_pmd_size); + + /* c->collapse() will find a hugepage and complain - call directly. */ + __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true); + validate_memory(p, 0, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); +} + +/* + * Test race with khugepaged where page tables have been retracted and + * pmd cleared. + */ +static void madvise_retracted_page_tables(struct collapse_context *c, + struct mem_ops *ops) +{ + void *p; + int nr_hpages = 1; + unsigned long size = nr_hpages * hpage_pmd_size; + + p = ops->setup_area(nr_hpages); + ops->fault(p, 0, size); + + /* Let khugepaged collapse and leave pmd cleared */ + if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages, + ops)) { + fail("Timeout"); + return; + } + success("OK"); + c->collapse("Install huge PMD from page cache", p, nr_hpages, ops, + true); + validate_memory(p, 0, size); + ops->cleanup_area(p, size); +} + +static void usage(void) +{ + fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n"); + fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n"); + fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n"); + fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n"); + fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); + fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); + fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); + fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); + fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + exit(1); +} + +static void parse_test_type(int argc, const char **argv) +{ + char *buf; + const char *token; + + if (argc == 1) { + /* Backwards compatibility */ + khugepaged_context = &__khugepaged_context; + madvise_context = &__madvise_context; + anon_ops = &__anon_ops; + return; + } + + buf = strdup(argv[1]); + token = strsep(&buf, ":"); + + if (!strcmp(token, "all")) { + khugepaged_context = &__khugepaged_context; + madvise_context = &__madvise_context; + } else if (!strcmp(token, "khugepaged")) { + khugepaged_context = &__khugepaged_context; + } else if (!strcmp(token, "madvise")) { + madvise_context = &__madvise_context; + } else { + usage(); + } + + if (!buf) + usage(); + + if (!strcmp(buf, "all")) { + file_ops = &__file_ops; + anon_ops = &__anon_ops; + shmem_ops = &__shmem_ops; + } else if (!strcmp(buf, "anon")) { + anon_ops = &__anon_ops; + } else if (!strcmp(buf, "file")) { + file_ops = &__file_ops; + } else if (!strcmp(buf, "shmem")) { + shmem_ops = &__shmem_ops; + } else { + usage(); + } + + if (!file_ops) + return; + + if (argc != 3) + usage(); +} + +int main(int argc, const char **argv) +{ + struct settings default_settings = { + .thp_enabled = THP_MADVISE, + .thp_defrag = THP_DEFRAG_ALWAYS, + .shmem_enabled = SHMEM_ADVISE, + .use_zero_page = 0, + .khugepaged = { + .defrag = 1, + .alloc_sleep_millisecs = 10, + .scan_sleep_millisecs = 10, + }, + /* + * When testing file-backed memory, the collapse path + * looks at how many pages are found in the page cache, not + * what pages are mapped. Disable read ahead optimization so + * pages don't find their way into the page cache unless + * we mem_ops->fault() them in. + */ + .read_ahead_kb = 0, + }; + + parse_test_type(argc, argv); + + if (file_ops) + get_finfo(argv[2]); + setbuf(stdout, NULL); page_size = getpagesize(); - hpage_pmd_size = read_num("hpage_pmd_size"); + hpage_pmd_size = read_pmd_pagesize(); hpage_pmd_nr = hpage_pmd_size / page_size; default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; @@ -1011,21 +1484,75 @@ int main(void) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; save_settings(); - adjust_settings(); + push_settings(&default_settings); alloc_at_fault(); - collapse_full(); - collapse_empty(); - collapse_single_pte_entry(); - collapse_max_ptes_none(); - collapse_swapin_single_pte(); - collapse_max_ptes_swap(); - collapse_single_pte_entry_compound(); - collapse_full_of_compound(); - collapse_compound_extreme(); - collapse_fork(); - collapse_fork_compound(); - collapse_max_ptes_shared(); + +#define TEST(t, c, o) do { \ + if (c && o) { \ + printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \ + t(c, o); \ + } \ + } while (0) + + TEST(collapse_full, khugepaged_context, anon_ops); + TEST(collapse_full, khugepaged_context, file_ops); + TEST(collapse_full, khugepaged_context, shmem_ops); + TEST(collapse_full, madvise_context, anon_ops); + TEST(collapse_full, madvise_context, file_ops); + TEST(collapse_full, madvise_context, shmem_ops); + + TEST(collapse_empty, khugepaged_context, anon_ops); + TEST(collapse_empty, madvise_context, anon_ops); + + TEST(collapse_single_pte_entry, khugepaged_context, anon_ops); + TEST(collapse_single_pte_entry, khugepaged_context, file_ops); + TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops); + TEST(collapse_single_pte_entry, madvise_context, anon_ops); + TEST(collapse_single_pte_entry, madvise_context, file_ops); + TEST(collapse_single_pte_entry, madvise_context, shmem_ops); + + TEST(collapse_max_ptes_none, khugepaged_context, anon_ops); + TEST(collapse_max_ptes_none, khugepaged_context, file_ops); + TEST(collapse_max_ptes_none, madvise_context, anon_ops); + TEST(collapse_max_ptes_none, madvise_context, file_ops); + + TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops); + TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops); + TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops); + TEST(collapse_single_pte_entry_compound, madvise_context, file_ops); + + TEST(collapse_full_of_compound, khugepaged_context, anon_ops); + TEST(collapse_full_of_compound, khugepaged_context, file_ops); + TEST(collapse_full_of_compound, khugepaged_context, shmem_ops); + TEST(collapse_full_of_compound, madvise_context, anon_ops); + TEST(collapse_full_of_compound, madvise_context, file_ops); + TEST(collapse_full_of_compound, madvise_context, shmem_ops); + + TEST(collapse_compound_extreme, khugepaged_context, anon_ops); + TEST(collapse_compound_extreme, madvise_context, anon_ops); + + TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops); + TEST(collapse_swapin_single_pte, madvise_context, anon_ops); + + TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops); + TEST(collapse_max_ptes_swap, madvise_context, anon_ops); + + TEST(collapse_fork, khugepaged_context, anon_ops); + TEST(collapse_fork, madvise_context, anon_ops); + + TEST(collapse_fork_compound, khugepaged_context, anon_ops); + TEST(collapse_fork_compound, madvise_context, anon_ops); + + TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops); + TEST(collapse_max_ptes_shared, madvise_context, anon_ops); + + TEST(madvise_collapse_existing_thps, madvise_context, anon_ops); + TEST(madvise_collapse_existing_thps, madvise_context, file_ops); + TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops); + + TEST(madvise_retracted_page_tables, madvise_context, file_ops); + TEST(madvise_retracted_page_tables, madvise_context, shmem_ops); restore_settings(0); } diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index db0270127aeb..9496346973d4 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -119,6 +119,50 @@ static unsigned long long get_mmap_min_addr(void) } /* + * This test validates that merge is called when expanding a mapping. + * Mapping containing three pages is created, middle page is unmapped + * and then the mapping containing the first page is expanded so that + * it fills the created hole. The two parts should merge creating + * single mapping with three pages. + */ +static void mremap_expand_merge(unsigned long page_size) +{ + char *test_name = "mremap expand merge"; + FILE *fp; + char *line = NULL; + size_t len = 0; + bool success = false; + char *start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + munmap(start + page_size, page_size); + mremap(start, page_size, 2 * page_size, 0); + + fp = fopen("/proc/self/maps", "r"); + if (fp == NULL) { + ksft_test_result_fail("%s\n", test_name); + return; + } + + while (getline(&line, &len, fp) != -1) { + char *first = strtok(line, "- "); + void *first_val = (void *)strtol(first, NULL, 16); + char *second = strtok(NULL, "- "); + void *second_val = (void *) strtol(second, NULL, 16); + + if (first_val == start && second_val == start + 3 * page_size) { + success = true; + break; + } + } + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); + fclose(fp); +} + +/* * Returns the start address of the mapping on success, else returns * NULL on failure. */ @@ -336,6 +380,7 @@ int main(int argc, char **argv) int i, run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; unsigned int pattern_seed; + int num_expand_tests = 1; struct test test_cases[MAX_TEST]; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -407,12 +452,14 @@ int main(int argc, char **argv) (threshold_mb * _1MB >= _1GB); ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ? - ARRAY_SIZE(perf_test_cases) : 0)); + ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests); for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, pattern_seed); + mremap_expand_merge(page_size); + if (run_perf_tests) { ksft_print_msg("\n%s\n", "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index de86983b8a0f..e780e76c26b8 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -120,11 +120,16 @@ run_test ./gup_test -a # Dump pages 0, 19, and 4096, using pin_user_pages: run_test ./gup_test -ct -F 0x1 0 19 0x1000 -run_test ./userfaultfd anon 20 16 -# Test requires source and destination huge pages. Size of source -# (half_ufd_size_MB) is passed as argument to test. -run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32 -run_test ./userfaultfd shmem 20 16 +uffd_mods=("" ":dev") +for mod in "${uffd_mods[@]}"; do + run_test ./userfaultfd anon${mod} 20 16 + # Hugetlb tests require source and destination huge pages. Pass in half + # the size ($half_ufd_size_MB), which is used for *each*. + run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 + run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test + rm -f "$mnt"/uffd-test + run_test ./userfaultfd shmem${mod} 20 16 +done #cleanup umount "$mnt" diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c index e3a43f5d4fa2..21d8830c5f24 100644 --- a/tools/testing/selftests/vm/soft-dirty.c +++ b/tools/testing/selftests/vm/soft-dirty.c @@ -91,7 +91,7 @@ static void test_hugepage(int pagemap_fd, int pagesize) for (i = 0; i < hpage_len; i++) map[i] = (char)i; - if (check_huge(map)) { + if (check_huge_anon(map, 1, hpage_len)) { ksft_test_result_pass("Test %s huge page allocation\n", __func__); clear_softdirty(); diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 6aa2b8253aed..76e1c36dd9e5 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -92,7 +92,6 @@ void split_pmd_thp(void) { char *one_page; size_t len = 4 * pmd_pagesize; - uint64_t thp_size; size_t i; one_page = memalign(pmd_pagesize, len); @@ -107,8 +106,7 @@ void split_pmd_thp(void) for (i = 0; i < len; i++) one_page[i] = (char)i; - thp_size = check_huge(one_page); - if (!thp_size) { + if (!check_huge_anon(one_page, 1, pmd_pagesize)) { printf("No THP is allocated\n"); exit(EXIT_FAILURE); } @@ -124,9 +122,8 @@ void split_pmd_thp(void) } - thp_size = check_huge(one_page); - if (thp_size) { - printf("Still %ld kB AnonHugePages not split\n", thp_size); + if (check_huge_anon(one_page, 0, pmd_pagesize)) { + printf("Still AnonHugePages not split\n"); exit(EXIT_FAILURE); } @@ -172,8 +169,7 @@ void split_pte_mapped_thp(void) for (i = 0; i < len; i++) one_page[i] = (char)i; - thp_size = check_huge(one_page); - if (!thp_size) { + if (!check_huge_anon(one_page, 1, pmd_pagesize)) { printf("No THP is allocated\n"); exit(EXIT_FAILURE); } diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh index 539c9371e592..46e19b5d648d 100755 --- a/tools/testing/selftests/vm/test_hmm.sh +++ b/tools/testing/selftests/vm/test_hmm.sh @@ -52,21 +52,11 @@ load_driver() usage fi fi - if [ $? == 0 ]; then - major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) - mknod /dev/hmm_dmirror0 c $major 0 - mknod /dev/hmm_dmirror1 c $major 1 - if [ $# -eq 2 ]; then - mknod /dev/hmm_dmirror2 c $major 2 - mknod /dev/hmm_dmirror3 c $major 3 - fi - fi } unload_driver() { modprobe -r $DRIVER > /dev/null 2>&1 - rm -f /dev/hmm_dmirror? } run_smoke() diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7c3f1b0ab468..297f250c1d95 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -61,10 +61,11 @@ #include <sys/random.h> #include "../kselftest.h" +#include "vm_util.h" #ifdef __NR_userfaultfd -static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size; #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -77,6 +78,13 @@ static int bounces; #define TEST_SHMEM 3 static int test_type; +#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY) + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* test using /dev/userfaultfd, instead of userfaultfd(2) */ +static bool test_dev_userfaultfd; + /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 static volatile bool test_uffdio_copy_eexist = true; @@ -92,9 +100,10 @@ static int huge_fd; static unsigned long long *count_verify; static int uffd = -1; static int uffd_flags, finished, *pipefd; -static char *area_src, *area_src_alias, *area_dst, *area_dst_alias; +static char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; static char *zeropage; pthread_attr_t attr; +static bool test_collapse; /* Userfaultfd test statistics */ struct uffd_stats { @@ -122,9 +131,13 @@ struct uffd_stats { #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#define factor_of_2(x) ((x) ^ ((x) & ((x) - 1))) + const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" "./userfaultfd anon 100 99999\n\n" + "# Run the same anonymous memory test, but using /dev/userfaultfd:\n" + "./userfaultfd anon:dev 100 99999\n\n" "# Run share memory test on 1GiB region with 99 bounces:\n" "./userfaultfd shmem 1000 99\n\n" "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" @@ -141,6 +154,16 @@ static void usage(void) "[hugetlbfs_file]\n\n"); fprintf(stderr, "Supported <test type>: anon, hugetlb, " "hugetlb_shared, shmem\n\n"); + fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. " + "Supported mods:\n"); + fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n"); + fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n"); + fprintf(stderr, "\tcollapse - Test MADV_COLLAPSE of UFFDIO_REGISTER_MODE_MINOR\n" + "memory\n"); + fprintf(stderr, "\nExample test mod usage:\n"); + fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n"); + fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n"); + fprintf(stderr, "Examples:\n\n"); fprintf(stderr, "%s", examples); exit(1); @@ -154,12 +177,14 @@ static void usage(void) ret, __LINE__); \ } while (0) -#define err(fmt, ...) \ +#define errexit(exitcode, fmt, ...) \ do { \ _err(fmt, ##__VA_ARGS__); \ - exit(1); \ + exit(exitcode); \ } while (0) +#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) + static void uffd_stats_reset(struct uffd_stats *uffd_stats, unsigned long n_cpus) { @@ -212,12 +237,10 @@ static void anon_release_pages(char *rel_area) err("madvise(MADV_DONTNEED) failed"); } -static void anon_allocate_area(void **alloc_area) +static void anon_allocate_area(void **alloc_area, bool is_src) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (*alloc_area == MAP_FAILED) - err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) @@ -235,7 +258,7 @@ static void hugetlb_release_pages(char *rel_area) } } -static void hugetlb_allocate_area(void **alloc_area) +static void hugetlb_allocate_area(void **alloc_area, bool is_src) { void *area_alias = NULL; char **alloc_area_alias; @@ -245,7 +268,7 @@ static void hugetlb_allocate_area(void **alloc_area) nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | - (*alloc_area == area_src ? 0 : MAP_NORESERVE), + (is_src ? 0 : MAP_NORESERVE), -1, 0); else @@ -253,9 +276,9 @@ static void hugetlb_allocate_area(void **alloc_area) nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_SHARED | - (*alloc_area == area_src ? 0 : MAP_NORESERVE), + (is_src ? 0 : MAP_NORESERVE), huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + is_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); @@ -265,12 +288,12 @@ static void hugetlb_allocate_area(void **alloc_area) PROT_READ | PROT_WRITE, MAP_SHARED, huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + is_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) err("mmap of hugetlb file alias failed"); } - if (*alloc_area == area_src) { + if (is_src) { alloc_area_alias = &area_src_alias; } else { alloc_area_alias = &area_dst_alias; @@ -293,21 +316,36 @@ static void shmem_release_pages(char *rel_area) err("madvise(MADV_REMOVE) failed"); } -static void shmem_allocate_area(void **alloc_area) +static void shmem_allocate_area(void **alloc_area, bool is_src) { void *area_alias = NULL; - bool is_src = alloc_area == (void **)&area_src; - unsigned long offset = is_src ? 0 : nr_pages * page_size; + size_t bytes = nr_pages * page_size; + unsigned long offset = is_src ? 0 : bytes; + char *p = NULL, *p_alias = NULL; + + if (test_collapse) { + p = BASE_PMD_ADDR; + if (!is_src) + /* src map + alias + interleaved hpages */ + p += 2 * (bytes + hpage_size); + p_alias = p; + p_alias += bytes; + p_alias += hpage_size; /* Prevent src/dst VMA merge */ + } - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, offset); + *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); + if (test_collapse && *alloc_area != p) + err("mmap of memfd failed at %p", p); - area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, offset); + area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, offset); if (area_alias == MAP_FAILED) err("mmap of memfd alias failed"); + if (test_collapse && area_alias != p_alias) + err("mmap of anonymous memory failed at %p", p_alias); if (is_src) area_src_alias = area_alias; @@ -320,28 +358,39 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) *start = (unsigned long)area_dst_alias + offset; } +static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +{ + if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, hpage_size)) + err("Did not find expected %d number of hugepages", + expect_nr_hpages); +} + struct uffd_test_ops { - void (*allocate_area)(void **alloc_area); + void (*allocate_area)(void **alloc_area, bool is_src); void (*release_pages)(char *rel_area); void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); + void (*check_pmd_mapping)(void *p, int expect_nr_hpages); }; static struct uffd_test_ops anon_uffd_test_ops = { .allocate_area = anon_allocate_area, .release_pages = anon_release_pages, .alias_mapping = noop_alias_mapping, + .check_pmd_mapping = NULL, }; static struct uffd_test_ops shmem_uffd_test_ops = { .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, .alias_mapping = shmem_alias_mapping, + .check_pmd_mapping = shmem_check_pmd_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, + .check_pmd_mapping = NULL, }; static struct uffd_test_ops *uffd_test_ops; @@ -383,13 +432,34 @@ static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls) } } +static int __userfaultfd_open_dev(void) +{ + int fd, _uffd; + + fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); + if (fd < 0) + errexit(KSFT_SKIP, "opening /dev/userfaultfd failed"); + + _uffd = ioctl(fd, USERFAULTFD_IOC_NEW, UFFD_FLAGS); + if (_uffd < 0) + errexit(errno == ENOTTY ? KSFT_SKIP : 1, + "creating userfaultfd failed"); + close(fd); + return _uffd; +} + static void userfaultfd_open(uint64_t *features) { struct uffdio_api uffdio_api; - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); - if (uffd < 0) - err("userfaultfd syscall not available in this kernel"); + if (test_dev_userfaultfd) + uffd = __userfaultfd_open_dev(); + else { + uffd = syscall(__NR_userfaultfd, UFFD_FLAGS); + if (uffd < 0) + errexit(errno == ENOSYS ? KSFT_SKIP : 1, + "creating userfaultfd failed"); + } uffd_flags = fcntl(uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; @@ -440,6 +510,7 @@ static void uffd_test_ctx_clear(void) munmap_area((void **)&area_src_alias); munmap_area((void **)&area_dst); munmap_area((void **)&area_dst_alias); + munmap_area((void **)&area_remap); } static void uffd_test_ctx_init(uint64_t features) @@ -448,8 +519,8 @@ static void uffd_test_ctx_init(uint64_t features) uffd_test_ctx_clear(); - uffd_test_ops->allocate_area((void **)&area_src); - uffd_test_ops->allocate_area((void **)&area_dst); + uffd_test_ops->allocate_area((void **)&area_src, true); + uffd_test_ops->allocate_area((void **)&area_dst, false); userfaultfd_open(&features); @@ -703,7 +774,27 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, continue_range(uffd, msg->arg.pagefault.address, page_size); stats->minor_faults++; } else { - /* Missing page faults */ + /* + * Missing page faults. + * + * Here we force a write check for each of the missing mode + * faults. It's guaranteed because the only threads that + * will trigger uffd faults are the locking threads, and + * their first instruction to touch the missing page will + * always be pthread_mutex_lock(). + * + * Note that here we relied on an NPTL glibc impl detail to + * always read the lock type at the entry of the lock op + * (pthread_mutex_t.__data.__type, offset 0x10) before + * doing any locking operations to guarantee that. It's + * actually not good to rely on this impl detail because + * logically a pthread-compatible lib can implement the + * locks without types and we can fail when linking with + * them. However since we used to find bugs with this + * strict check we still keep it around. Hopefully this + * could be a good hint when it fails again. If one day + * it'll break on some other impl of glibc we'll revisit. + */ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) err("unexpected write fault"); @@ -766,6 +857,7 @@ static void *uffd_poll_thread(void *arg) err("remove failure"); break; case UFFD_EVENT_REMAP: + area_remap = area_dst; /* save for later unmap */ area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } @@ -1218,13 +1310,30 @@ static int userfaultfd_sig_test(void) return userfaults != 0; } +void check_memory_contents(char *p) +{ + unsigned long i; + uint8_t expected_byte; + void *expected_page; + + if (posix_memalign(&expected_page, page_size, page_size)) + err("out of memory"); + + for (i = 0; i < nr_pages; ++i) { + expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); + memset(expected_page, expected_byte, page_size); + if (my_bcmp(expected_page, p + (i * page_size), page_size)) + err("unexpected page contents after minor fault"); + } + + free(expected_page); +} + static int userfaultfd_minor_test(void) { - struct uffdio_register uffdio_register; unsigned long p; + struct uffdio_register uffdio_register; pthread_t uffd_mon; - uint8_t expected_byte; - void *expected_page; char c; struct uffd_stats stats = { 0 }; @@ -1263,17 +1372,7 @@ static int userfaultfd_minor_test(void) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - - if (posix_memalign(&expected_page, page_size, page_size)) - err("out of memory"); - - for (p = 0; p < nr_pages; ++p) { - expected_byte = ~((uint8_t)(p % ((uint8_t)-1))); - memset(expected_page, expected_byte, page_size); - if (my_bcmp(expected_page, area_dst_alias + (p * page_size), - page_size)) - err("unexpected page contents after minor fault"); - } + check_memory_contents(area_dst_alias); if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); @@ -1282,6 +1381,23 @@ static int userfaultfd_minor_test(void) uffd_stats_report(&stats, 1); + if (test_collapse) { + printf("testing collapse of uffd memory into PMD-mapped THPs:"); + if (madvise(area_dst_alias, nr_pages * page_size, + MADV_COLLAPSE)) + err("madvise(MADV_COLLAPSE)"); + + uffd_test_ops->check_pmd_mapping(area_dst, + nr_pages * page_size / + hpage_size); + /* + * This won't cause uffd-fault - it purely just makes sure there + * was no corruption. + */ + check_memory_contents(area_dst_alias); + printf(" done.\n"); + } + return stats.missing_faults != 0 || stats.minor_faults != nr_pages; } @@ -1584,8 +1700,6 @@ unsigned long default_huge_page_size(void) static void set_test_type(const char *type) { - uint64_t features = UFFD_API_FEATURES; - if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; @@ -1603,12 +1717,37 @@ static void set_test_type(const char *type) test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; test_uffdio_minor = true; - } else { - err("Unknown test type: %s", type); } +} + +static void parse_test_type_arg(const char *raw_type) +{ + char *buf = strdup(raw_type); + uint64_t features = UFFD_API_FEATURES; + + while (buf) { + const char *token = strsep(&buf, ":"); + + if (!test_type) + set_test_type(token); + else if (!strcmp(token, "dev")) + test_dev_userfaultfd = true; + else if (!strcmp(token, "syscall")) + test_dev_userfaultfd = false; + else if (!strcmp(token, "collapse")) + test_collapse = true; + else + err("unrecognized test mod '%s'", token); + } + + if (!test_type) + err("failed to parse test type argument: '%s'", raw_type); + + if (test_collapse && test_type != TEST_SHMEM) + err("Unsupported test: %s", raw_type); if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + page_size = hpage_size; else page_size = sysconf(_SC_PAGE_SIZE); @@ -1646,6 +1785,8 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + size_t bytes; + if (argc < 4) usage(); @@ -1653,11 +1794,41 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - set_test_type(argv[1]); + hpage_size = default_huge_page_size(); + parse_test_type_arg(argv[1]); + bytes = atol(argv[2]) * 1024 * 1024; + + if (test_collapse && bytes & (hpage_size - 1)) + err("MiB must be multiple of %lu if :collapse mod set", + hpage_size >> 20); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / - nr_cpus; + + if (test_collapse) { + /* nr_cpus must divide (bytes / page_size), otherwise, + * area allocations of (nr_pages * paze_size) won't be a + * multiple of hpage_size, even if bytes is a multiple of + * hpage_size. + * + * This means that nr_cpus must divide (N * (2 << (H-P)) + * where: + * bytes = hpage_size * N + * hpage_size = 2 << H + * page_size = 2 << P + * + * And we want to chose nr_cpus to be the largest value + * satisfying this constraint, not larger than the number + * of online CPUs. Unfortunately, prime factorization of + * N and nr_cpus may be arbitrary, so have to search for it. + * Instead, just use the highest power of 2 dividing both + * nr_cpus and (bytes / page_size). + */ + int x = factor_of_2(nr_cpus); + int y = factor_of_2(bytes / page_size); + + nr_cpus = x < y ? x : y; + } + nr_pages_per_cpu = bytes / page_size / nr_cpus; if (!nr_pages_per_cpu) { _err("invalid MiB"); usage(); diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index b58ab11a7a30..f11f8adda521 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -42,9 +42,9 @@ void clear_softdirty(void) ksft_exit_fail_msg("writing clear_refs failed\n"); } -static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) { - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + while (fgets(buf, len, fp)) { if (!strncmp(buf, pattern, strlen(pattern))) return true; } @@ -72,9 +72,10 @@ uint64_t read_pmd_pagesize(void) return strtoul(buf, NULL, 10); } -uint64_t check_huge(void *addr) +bool __check_huge(void *addr, char *pattern, int nr_hpages, + uint64_t hpage_size) { - uint64_t thp = 0; + uint64_t thp = -1; int ret; FILE *fp; char buffer[MAX_LINE_LENGTH]; @@ -89,20 +90,37 @@ uint64_t check_huge(void *addr) if (!fp) ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); - if (!check_for_pattern(fp, addr_pattern, buffer)) + if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) goto err_out; /* - * Fetch the AnonHugePages: in the same block and check the number of + * Fetch the pattern in the same block and check the number of * hugepages. */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) goto err_out; - if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) + snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); + + if (sscanf(buffer, addr_pattern, &thp) != 1) ksft_exit_fail_msg("Reading smap error\n"); err_out: fclose(fp); - return thp; + return thp == (nr_hpages * (hpage_size >> 10)); +} + +bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) +{ + return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); +} + +bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) +{ + return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size); +} + +bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) +{ + return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size); } diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 2e512bd57ae1..5c35de454e08 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -5,5 +5,8 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); void clear_softdirty(void); +bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); -uint64_t check_huge(void *addr); +bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); +bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); +bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); |