summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 17:07:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 17:07:39 -0800
commit239451e90355be68130410ef8fadef8cd130a35d (patch)
tree711be8a57429b79790ffee0e266ef4bf70b3e34d
parentb8878e5a5c62a16ad491ba664a9c1efff62e9a99 (diff)
parent4ecc96cba8d93d86abf46d17067e9b4c96241a29 (diff)
Merge tag 'for-linus-6.3-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross: - help deprecate the /proc/xen files by making the related information available via sysfs - mark the Xen variants of play_dead "noreturn" - support a shared Xen platform interrupt - several small cleanups and fixes * tag 'for-linus-6.3-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen: sysfs: make kobj_type structure constant x86/Xen: drop leftover VM-assist uses xen: Replace one-element array with flexible-array member xen/grant-dma-iommu: Implement a dummy probe_device() callback xen/pvcalls-back: fix permanently masked event channel xen: Allow platform PCI interrupt to be shared x86/xen/time: prefer tsc as clocksource when it is invariant x86/xen: mark xen_pv_play_dead() as __noreturn x86/xen: don't let xen_pv_play_dead() return drivers/xen/hypervisor: Expose Xen SIF flags to userspace
-rw-r--r--Documentation/ABI/stable/sysfs-hypervisor-xen13
-rw-r--r--arch/x86/xen/setup.c4
-rw-r--r--arch/x86/xen/smp.h2
-rw-r--r--arch/x86/xen/smp_pv.c17
-rw-r--r--arch/x86/xen/time.c38
-rw-r--r--arch/x86/xen/xen-head.S7
-rw-r--r--drivers/xen/events/events_base.c9
-rw-r--r--drivers/xen/grant-dma-iommu.c11
-rw-r--r--drivers/xen/platform-pci.c5
-rw-r--r--drivers/xen/pvcalls-back.c3
-rw-r--r--drivers/xen/sys-hypervisor.c71
-rw-r--r--drivers/xen/xen-front-pgdir-shbuf.c2
-rw-r--r--include/xen/events.h2
-rw-r--r--tools/objtool/check.c1
14 files changed, 151 insertions, 34 deletions
diff --git a/Documentation/ABI/stable/sysfs-hypervisor-xen b/Documentation/ABI/stable/sysfs-hypervisor-xen
index 748593c64568..be9ca9981bb1 100644
--- a/Documentation/ABI/stable/sysfs-hypervisor-xen
+++ b/Documentation/ABI/stable/sysfs-hypervisor-xen
@@ -120,3 +120,16 @@ Contact: xen-devel@lists.xenproject.org
Description: If running under Xen:
The Xen version is in the format <major>.<minor><extra>
This is the <minor> part of it.
+
+What: /sys/hypervisor/start_flags/*
+Date: March 2023
+KernelVersion: 6.3.0
+Contact: xen-devel@lists.xenproject.org
+Description: If running under Xen:
+ All bits in Xen's start-flags are represented as
+ boolean files, returning '1' if set, '0' otherwise.
+ This takes the place of the defunct /proc/xen/capabilities,
+ which would contain "control_d" on dom0, and be empty
+ otherwise. This flag is now exposed as "initdomain" in
+ addition to the "privileged" flag; all other possible flags
+ are accessible as "unknownXX".
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8db26f10fb1d..c2be3efb2ba0 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -934,12 +934,8 @@ void xen_enable_syscall(void)
static void __init xen_pvmmu_arch_setup(void)
{
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
-
if (register_callback(CALLBACKTYPE_event,
xen_asm_exc_xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index bd02f9d50107..22fb982ff971 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -21,6 +21,8 @@ void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
+void __noreturn xen_cpu_bringup_again(unsigned long stack);
+
struct xen_common_irq {
int irq;
char *name;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 6175f2c5c822..a9cf8c8fa074 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -381,21 +381,12 @@ static void xen_pv_cpu_die(unsigned int cpu)
}
}
-static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
+static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
- cpu_bringup();
- /*
- * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
- * clears certain data that the cpu_idle loop (which called us
- * and that we return from) expects. The only way to get that
- * data back is to call:
- */
- tick_nohz_idle_enter();
- tick_nohz_idle_stop_tick_protected();
-
- cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
+ xen_cpu_bringup_again((unsigned long)task_pt_regs(current));
+ BUG();
}
#else /* !CONFIG_HOTPLUG_CPU */
@@ -409,7 +400,7 @@ static void xen_pv_cpu_die(unsigned int cpu)
BUG();
}
-static void xen_pv_play_dead(void)
+static void __noreturn xen_pv_play_dead(void)
{
BUG();
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 6b8836deb738..1d597364b49d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -482,15 +482,51 @@ static void xen_setup_vsyscall_time_info(void)
xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
}
+/*
+ * Check if it is possible to safely use the tsc as a clocksource. This is
+ * only true if the hypervisor notifies the guest that its tsc is invariant,
+ * the tsc is stable, and the tsc instruction will never be emulated.
+ */
+static int __init xen_tsc_safe_clocksource(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
+ return 0;
+
+ if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
+ return 0;
+
+ if (check_tsc_unstable())
+ return 0;
+
+ /* Leaf 4, sub-leaf 0 (0x40000x03) */
+ cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
+
+ /* tsc_mode = no_emulate (2) */
+ if (ebx != 2)
+ return 0;
+
+ return 1;
+}
+
static void __init xen_time_init(void)
{
struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
struct timespec64 tp;
- /* As Dom0 is never moved, no penalty on using TSC there */
+ /*
+ * As Dom0 is never moved, no penalty on using TSC there.
+ *
+ * If it is possible for the guest to determine that the tsc is a safe
+ * clocksource, then set xen_clocksource rating below that of the tsc
+ * so that the system prefers tsc instead.
+ */
if (xen_initial_domain())
xen_clocksource.rating = 275;
+ else if (xen_tsc_safe_clocksource())
+ xen_clocksource.rating = 299;
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index ffaa62167f6e..e36ea4268bd2 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -76,6 +76,13 @@ SYM_CODE_START(asm_cpu_bringup_and_idle)
call cpu_bringup_and_idle
SYM_CODE_END(asm_cpu_bringup_and_idle)
+
+SYM_CODE_START(xen_cpu_bringup_again)
+ UNWIND_HINT_FUNC
+ mov %rdi, %rsp
+ UNWIND_HINT_REGS
+ call cpu_bringup_and_idle
+SYM_CODE_END(xen_cpu_bringup_again)
.popsection
#endif
#endif
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index c443f04aaad7..c7715f8bd452 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1710,9 +1710,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
generic_handle_irq(irq);
}
-static void __xen_evtchn_do_upcall(void)
+static int __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE;
int cpu = smp_processor_id();
struct evtchn_loop_ctrl ctrl = { 0 };
@@ -1737,6 +1738,8 @@ static void __xen_evtchn_do_upcall(void)
* above.
*/
__this_cpu_inc(irq_epoch);
+
+ return ret;
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
@@ -1751,9 +1754,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
set_irq_regs(old_regs);
}
-void xen_hvm_evtchn_do_upcall(void)
+int xen_hvm_evtchn_do_upcall(void)
{
- __xen_evtchn_do_upcall();
+ return __xen_evtchn_do_upcall();
}
EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
diff --git a/drivers/xen/grant-dma-iommu.c b/drivers/xen/grant-dma-iommu.c
index 16b8bc0c0b33..6a9fe02c6bfc 100644
--- a/drivers/xen/grant-dma-iommu.c
+++ b/drivers/xen/grant-dma-iommu.c
@@ -16,8 +16,15 @@ struct grant_dma_iommu_device {
struct iommu_device iommu;
};
-/* Nothing is really needed here */
-static const struct iommu_ops grant_dma_iommu_ops;
+static struct iommu_device *grant_dma_iommu_probe_device(struct device *dev)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+/* Nothing is really needed here except a dummy probe_device callback */
+static const struct iommu_ops grant_dma_iommu_ops = {
+ .probe_device = grant_dma_iommu_probe_device,
+};
static const struct of_device_id grant_dma_iommu_of_match[] = {
{ .compatible = "xen,grant-dma" },
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index cd07e3fed0fa..fcc819131572 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -64,14 +64,13 @@ static uint64_t get_callback_via(struct pci_dev *pdev)
static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
{
- xen_hvm_evtchn_do_upcall();
- return IRQ_HANDLED;
+ return xen_hvm_evtchn_do_upcall();
}
static int xen_allocate_irq(struct pci_dev *pdev)
{
return request_irq(pdev->irq, do_hvm_evtchn_intr,
- IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+ IRQF_NOBALANCING | IRQF_SHARED,
"xen-platform-pci", pdev);
}
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 0d4f8f4f4948..7a464a541d91 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -173,6 +173,8 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map)
RING_IDX cons, prod, size, array_size;
int ret;
+ atomic_set(&map->write, 0);
+
cons = intf->out_cons;
prod = intf->out_prod;
/* read the indexes before dealing with the data */
@@ -197,7 +199,6 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map)
iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 2, size);
}
- atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size);
if (ret == -EAGAIN) {
atomic_inc(&map->write);
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index fcb0792f090e..2f880374b463 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -31,7 +31,10 @@ struct hyp_sysfs_attr {
struct attribute attr;
ssize_t (*show)(struct hyp_sysfs_attr *, char *);
ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
- void *hyp_attr_data;
+ union {
+ void *hyp_attr_data;
+ unsigned long hyp_attr_value;
+ };
};
static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
@@ -399,6 +402,60 @@ static int __init xen_sysfs_properties_init(void)
return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
}
+#define FLAG_UNAME "unknown"
+#define FLAG_UNAME_FMT FLAG_UNAME "%02u"
+#define FLAG_UNAME_MAX sizeof(FLAG_UNAME "XX")
+#define FLAG_COUNT (sizeof(xen_start_flags) * BITS_PER_BYTE)
+static_assert(sizeof(xen_start_flags) <=
+ sizeof_field(struct hyp_sysfs_attr, hyp_attr_value));
+
+static ssize_t flag_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+ char *p = buffer;
+
+ *p++ = '0' + ((xen_start_flags & attr->hyp_attr_value) != 0);
+ *p++ = '\n';
+ return p - buffer;
+}
+
+#define FLAG_NODE(flag, node) \
+ [ilog2(flag)] = { \
+ .attr = { .name = #node, .mode = 0444 },\
+ .show = flag_show, \
+ .hyp_attr_value = flag \
+ }
+
+/*
+ * Add new, known flags here. No other changes are required, but
+ * note that each known flag wastes one entry in flag_unames[].
+ * The code/complexity machinations to avoid this isn't worth it
+ * for a few entries, but keep it in mind.
+ */
+static struct hyp_sysfs_attr flag_attrs[FLAG_COUNT] = {
+ FLAG_NODE(SIF_PRIVILEGED, privileged),
+ FLAG_NODE(SIF_INITDOMAIN, initdomain)
+};
+static struct attribute_group xen_flags_group = {
+ .name = "start_flags",
+ .attrs = (struct attribute *[FLAG_COUNT + 1]){}
+};
+static char flag_unames[FLAG_COUNT][FLAG_UNAME_MAX];
+
+static int __init xen_sysfs_flags_init(void)
+{
+ for (unsigned fnum = 0; fnum != FLAG_COUNT; fnum++) {
+ if (likely(flag_attrs[fnum].attr.name == NULL)) {
+ sprintf(flag_unames[fnum], FLAG_UNAME_FMT, fnum);
+ flag_attrs[fnum].attr.name = flag_unames[fnum];
+ flag_attrs[fnum].attr.mode = 0444;
+ flag_attrs[fnum].show = flag_show;
+ flag_attrs[fnum].hyp_attr_value = 1 << fnum;
+ }
+ xen_flags_group.attrs[fnum] = &flag_attrs[fnum].attr;
+ }
+ return sysfs_create_group(hypervisor_kobj, &xen_flags_group);
+}
+
#ifdef CONFIG_XEN_HAVE_VPMU
struct pmu_mode {
const char *name;
@@ -539,18 +596,22 @@ static int __init hyper_sysfs_init(void)
ret = xen_sysfs_properties_init();
if (ret)
goto prop_out;
+ ret = xen_sysfs_flags_init();
+ if (ret)
+ goto flags_out;
#ifdef CONFIG_XEN_HAVE_VPMU
if (xen_initial_domain()) {
ret = xen_sysfs_pmu_init();
if (ret) {
- sysfs_remove_group(hypervisor_kobj,
- &xen_properties_group);
- goto prop_out;
+ sysfs_remove_group(hypervisor_kobj, &xen_flags_group);
+ goto flags_out;
}
}
#endif
goto out;
+flags_out:
+ sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
prop_out:
sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
uuid_out:
@@ -594,7 +655,7 @@ static const struct sysfs_ops hyp_sysfs_ops = {
.store = hyp_sysfs_store,
};
-static struct kobj_type hyp_sysfs_kobj_type = {
+static const struct kobj_type hyp_sysfs_kobj_type = {
.sysfs_ops = &hyp_sysfs_ops,
};
diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c
index 5c0b5cb5b419..b52e0fa595a9 100644
--- a/drivers/xen/xen-front-pgdir-shbuf.c
+++ b/drivers/xen/xen-front-pgdir-shbuf.c
@@ -30,7 +30,7 @@
struct xen_page_directory {
grant_ref_t gref_dir_next_page;
#define XEN_GREF_LIST_END 0
- grant_ref_t gref[1]; /* Variable length */
+ grant_ref_t gref[]; /* Variable length */
};
/**
diff --git a/include/xen/events.h b/include/xen/events.h
index 344081e71584..44c2855c76d1 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -107,7 +107,7 @@ evtchn_port_t evtchn_from_irq(unsigned irq);
int xen_set_callback_via(uint64_t via);
void xen_evtchn_do_upcall(struct pt_regs *regs);
-void xen_hvm_evtchn_do_upcall(void);
+int xen_hvm_evtchn_do_upcall(void);
/* Bind a pirq for a physical interrupt to an irq. */
int xen_bind_pirq_gsi_to_irq(unsigned gsi,
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b118f588cd2b..94b518c12f9a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -186,6 +186,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"snp_abort",
"stop_this_cpu",
"usercopy_abort",
+ "xen_cpu_bringup_again",
"xen_start_kernel",
};