summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-19 08:48:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-19 08:48:09 -0700
commit0815d5cc7dfb4a2c6d02a6eb86974ab3992b803d (patch)
tree2116699a753b2cef5951e29676cbfde423b44003 /arch
parentb3603fcb79b1036acae10602bffc4855a4b9af80 (diff)
parentd277f9d82802223f242cd9b60c988cfdda1d6be0 (diff)
Merge tag 'for-linus-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross: - Xen event channel handling fix for a regression with a rare kernel config and some added hardening - better support of running Xen dom0 in PVH mode - a cleanup for the xen grant-dma-iommu driver * tag 'for-linus-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen/events: increment refcnt only if event channel is refcounted xen/evtchn: avoid WARN() when unbinding an event channel x86/xen: attempt to inflate the memory balloon on PVH xen/grant-dma-iommu: Convert to platform remove callback returning void
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h5
-rw-r--r--arch/x86/platform/pvh/enlighten.c3
-rw-r--r--arch/x86/xen/enlighten.c32
-rw-r--r--arch/x86/xen/enlighten_pvh.c68
-rw-r--r--arch/x86/xen/setup.c44
-rw-r--r--arch/x86/xen/xen-ops.h14
6 files changed, 122 insertions, 44 deletions
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index a9088250770f..64fbd2dbc5b7 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,6 +62,11 @@ void xen_arch_unregister_cpu(int num);
#ifdef CONFIG_PVH
void __init xen_pvh_init(struct boot_params *boot_params);
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
+#ifdef CONFIG_XEN_PVH
+void __init xen_reserve_extra_memory(struct boot_params *bootp);
+#else
+static inline void xen_reserve_extra_memory(struct boot_params *bootp) { }
+#endif
#endif
/* Lazy mode for batching updates / context switch */
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 944e0290f2c0..8c2d4b8de25d 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -75,6 +75,9 @@ static void __init init_pvh_bootparams(bool xen_guest)
} else
xen_raw_printk("Warning: Can fit ISA range into e820\n");
+ if (xen_guest)
+ xen_reserve_extra_memory(&pvh_bootparams);
+
pvh_bootparams.hdr.cmd_line_ptr =
pvh_start_info.cmdline_paddr;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3c61bb98c10e..a01ca255b0c6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -6,6 +6,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/kexec.h>
+#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/panic_notifier.h>
@@ -350,3 +351,34 @@ void xen_arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(xen_arch_unregister_cpu);
#endif
+
+/* Amount of extra memory space we add to the e820 ranges */
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns)
+{
+ unsigned int i;
+
+ /*
+ * No need to check for zero size, should happen rarely and will only
+ * write a new entry regarded to be unused due to zero size.
+ */
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ /* Add new region. */
+ if (xen_extra_mem[i].n_pfns == 0) {
+ xen_extra_mem[i].start_pfn = start_pfn;
+ xen_extra_mem[i].n_pfns = n_pfns;
+ break;
+ }
+ /* Append to existing region. */
+ if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
+ start_pfn) {
+ xen_extra_mem[i].n_pfns += n_pfns;
+ break;
+ }
+ }
+ if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+ printk(KERN_WARNING "Warning: not enough extra memory regions\n");
+
+ memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
+}
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 9e9db601bd52..27a2a02ef8fb 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/acpi.h>
#include <linux/export.h>
+#include <linux/mm.h>
#include <xen/hvc-console.h>
@@ -73,3 +74,70 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
}
boot_params_p->e820_entries = memmap.nr_entries;
}
+
+/*
+ * Reserve e820 UNUSABLE regions to inflate the memory balloon.
+ *
+ * On PVH dom0 the host memory map is used, RAM regions available to dom0 are
+ * located as the same place as in the native memory map, but since dom0 gets
+ * less memory than the total amount of host RAM the ranges that can't be
+ * populated are converted from RAM -> UNUSABLE. Use such regions (up to the
+ * ratio signaled in EXTRA_MEM_RATIO) in order to inflate the balloon driver at
+ * boot. Doing so prevents the guest (even if just temporary) from using holes
+ * in the memory map in order to map grants or foreign addresses, and
+ * hopefully limits the risk of a clash with a device MMIO region. Ideally the
+ * hypervisor should notify us which memory ranges are suitable for creating
+ * foreign mappings, but that's not yet implemented.
+ */
+void __init xen_reserve_extra_memory(struct boot_params *bootp)
+{
+ unsigned int i, ram_pages = 0, extra_pages;
+
+ for (i = 0; i < bootp->e820_entries; i++) {
+ struct boot_e820_entry *e = &bootp->e820_table[i];
+
+ if (e->type != E820_TYPE_RAM)
+ continue;
+ ram_pages += PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr);
+ }
+
+ /* Max amount of extra memory. */
+ extra_pages = EXTRA_MEM_RATIO * ram_pages;
+
+ /*
+ * Convert UNUSABLE ranges to RAM and reserve them for foreign mapping
+ * purposes.
+ */
+ for (i = 0; i < bootp->e820_entries && extra_pages; i++) {
+ struct boot_e820_entry *e = &bootp->e820_table[i];
+ unsigned long pages;
+
+ if (e->type != E820_TYPE_UNUSABLE)
+ continue;
+
+ pages = min(extra_pages,
+ PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr));
+
+ if (pages != (PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr))) {
+ struct boot_e820_entry *next;
+
+ if (bootp->e820_entries ==
+ ARRAY_SIZE(bootp->e820_table))
+ /* No space left to split - skip region. */
+ continue;
+
+ /* Split entry. */
+ next = e + 1;
+ memmove(next, e,
+ (bootp->e820_entries - i) * sizeof(*e));
+ bootp->e820_entries++;
+ next->addr = PAGE_ALIGN(e->addr) + PFN_PHYS(pages);
+ e->size = next->addr - e->addr;
+ next->size -= e->size;
+ }
+ e->type = E820_TYPE_RAM;
+ extra_pages -= pages;
+
+ xen_add_extra_mem(PFN_UP(e->addr), pages);
+ }
+}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b3e37961065a..380591028cb8 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -38,9 +38,6 @@
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
-/* Amount of extra memory space we add to the e820 ranges */
-struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
-
/* Number of pages released from the initial allocation. */
unsigned long xen_released_pages;
@@ -64,18 +61,6 @@ static struct {
} xen_remap_buf __initdata __aligned(PAGE_SIZE);
static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
-/*
- * The maximum amount of extra memory compared to the base size. The
- * main scaling factor is the size of struct page. At extreme ratios
- * of base:extra, all the base memory can be filled with page
- * structures for the extra memory, leaving no space for anything
- * else.
- *
- * 10x seems like a reasonable balance between scaling flexibility and
- * leaving a practically usable system.
- */
-#define EXTRA_MEM_RATIO (10)
-
static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
static void __init xen_parse_512gb(void)
@@ -96,35 +81,6 @@ static void __init xen_parse_512gb(void)
xen_512gb_limit = val;
}
-static void __init xen_add_extra_mem(unsigned long start_pfn,
- unsigned long n_pfns)
-{
- int i;
-
- /*
- * No need to check for zero size, should happen rarely and will only
- * write a new entry regarded to be unused due to zero size.
- */
- for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
- /* Add new region. */
- if (xen_extra_mem[i].n_pfns == 0) {
- xen_extra_mem[i].start_pfn = start_pfn;
- xen_extra_mem[i].n_pfns = n_pfns;
- break;
- }
- /* Append to existing region. */
- if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
- start_pfn) {
- xen_extra_mem[i].n_pfns += n_pfns;
- break;
- }
- }
- if (i == XEN_EXTRA_MEM_MAX_REGIONS)
- printk(KERN_WARNING "Warning: not enough extra memory regions\n");
-
- memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
-}
-
static void __init xen_del_extra_mem(unsigned long start_pfn,
unsigned long n_pfns)
{
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index a87ab36889e7..79cf93f2c92f 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -163,4 +163,18 @@ void xen_hvm_post_suspend(int suspend_cancelled);
static inline void xen_hvm_post_suspend(int suspend_cancelled) {}
#endif
+/*
+ * The maximum amount of extra memory compared to the base size. The
+ * main scaling factor is the size of struct page. At extreme ratios
+ * of base:extra, all the base memory can be filled with page
+ * structures for the extra memory, leaving no space for anything
+ * else.
+ *
+ * 10x seems like a reasonable balance between scaling flexibility and
+ * leaving a practically usable system.
+ */
+#define EXTRA_MEM_RATIO (10)
+
+void xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns);
+
#endif /* XEN_OPS_H */