summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/blk-cgroup.h5
-rw-r--r--include/linux/blk_types.h2
-rw-r--r--include/linux/fs.h31
-rw-r--r--include/linux/fsverity.h2
-rw-r--r--include/linux/kvm_host.h60
-rw-r--r--include/linux/kvm_types.h11
-rw-r--r--include/linux/net.h19
-rw-r--r--include/linux/nvme.h1
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/sbitmap.h2
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/user_events.h116
-rw-r--r--include/linux/xarray.h1
13 files changed, 195 insertions, 58 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index f2ad8ed8f777..652cd05b0924 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -95,7 +95,10 @@ struct blkcg_gq {
spinlock_t async_bio_lock;
struct bio_list async_bios;
- struct work_struct async_bio_work;
+ union {
+ struct work_struct async_bio_work;
+ struct work_struct free_work;
+ };
atomic_t use_delay;
atomic64_t delay_nsec;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dd0763a1c674..1973ef9bd40f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -85,8 +85,10 @@ struct block_device {
*/
#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
typedef u32 __bitwise blk_status_t;
+typedef u32 blk_short_t;
#else
typedef u8 __bitwise blk_status_t;
+typedef u16 blk_short_t;
#endif
#define BLK_STS_OK 0
#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 183160872133..bbde95387a23 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -275,7 +275,6 @@ enum positive_aop_returns {
AOP_TRUNCATED_PAGE = 0x80001,
};
-#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */
#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct
* helper code (eg buffer layer)
* to clear GFP_FS from alloc */
@@ -338,28 +337,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
return kiocb->ki_complete == NULL;
}
-/*
- * "descriptor" for what we're up to with a read.
- * This allows us to use the same read code yet
- * have multiple different users of the data that
- * we read from a file.
- *
- * The simplest case just copies the data to user
- * mode.
- */
-typedef struct {
- size_t written;
- size_t count;
- union {
- char __user *buf;
- void *data;
- } arg;
- int error;
-} read_descriptor_t;
-
-typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
- unsigned long, unsigned long);
-
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
@@ -370,12 +347,6 @@ struct address_space_operations {
/* Mark a folio dirty. Return true if this dirtied it */
bool (*dirty_folio)(struct address_space *, struct folio *);
- /*
- * Reads in the requested pages. Unlike ->readpage(), this is
- * PURELY used for read-ahead!.
- */
- int (*readpages)(struct file *filp, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages);
void (*readahead)(struct readahead_control *);
int (*write_begin)(struct file *, struct address_space *mapping,
@@ -3027,7 +2998,7 @@ extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
-extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
+ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags);
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index b568b3c7d095..a7afc800bd8d 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -221,7 +221,7 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work)
*
* This checks whether ->i_verity_info has been set.
*
- * Filesystems call this from ->readpages() to check whether the pages need to
+ * Filesystems call this from ->readahead() to check whether the pages need to
* be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
* a race condition where the file is being read concurrently with
* FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9536ffa0473b..3f9b22c4983a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -148,6 +148,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_MASK GENMASK(7,0)
#define KVM_REQUEST_NO_WAKEUP BIT(8)
#define KVM_REQUEST_WAIT BIT(9)
+#define KVM_REQUEST_NO_ACTION BIT(10)
/*
* Architecture-independent vcpu->requests bit members
* Bits 4-7 are reserved for more arch-independent bits.
@@ -156,9 +157,18 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
-#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQUEST_ARCH_BASE 8
+/*
+ * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to
+ * OUTSIDE_GUEST_MODE. KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick"
+ * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing
+ * on. A kick only guarantees that the vCPU is on its way out, e.g. a previous
+ * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no
+ * guarantee the vCPU received an IPI and has actually exited guest mode.
+ */
+#define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
@@ -1221,27 +1231,27 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
* @gpc: struct gfn_to_pfn_cache object.
* @vcpu: vCPU to be used for marking pages dirty and to be woken on
* invalidation.
- * @guest_uses_pa: indicates that the resulting host physical PFN is used while
- * @vcpu is IN_GUEST_MODE so invalidations should wake it.
- * @kernel_map: requests a kernel virtual mapping (kmap / memremap).
+ * @usage: indicates if the resulting host physical PFN is used while
+ * the @vcpu is IN_GUEST_MODE (in which case invalidation of
+ * the cache from MMU notifiers---but not for KVM memslot
+ * changes!---will also force @vcpu to exit the guest and
+ * refresh the cache); and/or if the PFN used directly
+ * by KVM (and thus needs a kernel virtual mapping).
* @gpa: guest physical address to map.
* @len: sanity check; the range being access must fit a single page.
- * @dirty: mark the cache dirty immediately.
*
* @return: 0 for success.
* -EINVAL for a mapping which would cross a page boundary.
* -EFAULT for an untranslatable guest physical address.
*
* This primes a gfn_to_pfn_cache and links it into the @kvm's list for
- * invalidations to be processed. Invalidation callbacks to @vcpu using
- * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM
- * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check()
- * to ensure that the cache is valid before accessing the target page.
+ * invalidations to be processed. Callers are required to use
+ * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
+ * accessing the target page.
*/
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- struct kvm_vcpu *vcpu, bool guest_uses_pa,
- bool kernel_map, gpa_t gpa, unsigned long len,
- bool dirty);
+ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+ gpa_t gpa, unsigned long len);
/**
* kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@@ -1250,7 +1260,6 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
* @gpc: struct gfn_to_pfn_cache object.
* @gpa: current guest physical address to map.
* @len: sanity check; the range being access must fit a single page.
- * @dirty: mark the cache dirty immediately.
*
* @return: %true if the cache is still valid and the address matches.
* %false if the cache is not valid.
@@ -1272,7 +1281,6 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
* @gpc: struct gfn_to_pfn_cache object.
* @gpa: updated guest physical address to map.
* @len: sanity check; the range being access must fit a single page.
- * @dirty: mark the cache dirty immediately.
*
* @return: 0 for success.
* -EINVAL for a mapping which would cross a page boundary.
@@ -1285,7 +1293,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
* with the lock still held to permit access.
*/
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- gpa_t gpa, unsigned long len, bool dirty);
+ gpa_t gpa, unsigned long len);
/**
* kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
@@ -1293,10 +1301,9 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
* @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object.
*
- * This unmaps the referenced page and marks it dirty, if appropriate. The
- * cache is left in the invalid state but at least the mapping from GPA to
- * userspace HVA will remain cached and can be reused on a subsequent
- * refresh.
+ * This unmaps the referenced page. The cache is left in the invalid state
+ * but at least the mapping from GPA to userspace HVA will remain cached
+ * and can be reused on a subsequent refresh.
*/
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
@@ -1984,7 +1991,7 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
void kvm_arch_irq_routing_update(struct kvm *kvm);
-static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
+static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu)
{
/*
* Ensure the rest of the request is published to kvm_check_request's
@@ -1994,6 +2001,19 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
}
+static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
+{
+ /*
+ * Request that don't require vCPU action should never be logged in
+ * vcpu->requests. The vCPU won't clear the request, so it will stay
+ * logged indefinitely and prevent the vCPU from entering the guest.
+ */
+ BUILD_BUG_ON(!__builtin_constant_p(req) ||
+ (req & KVM_REQUEST_NO_ACTION));
+
+ __kvm_make_request(req, vcpu);
+}
+
static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
{
return READ_ONCE(vcpu->requests);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index dceac12c1ce5..ac1ebb37a0ff 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -18,6 +18,7 @@ struct kvm_memslots;
enum kvm_mr_change;
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/spinlock_types.h>
@@ -46,6 +47,12 @@ typedef u64 hfn_t;
typedef hfn_t kvm_pfn_t;
+enum pfn_cache_usage {
+ KVM_GUEST_USES_PFN = BIT(0),
+ KVM_HOST_USES_PFN = BIT(1),
+ KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN,
+};
+
struct gfn_to_hva_cache {
u64 generation;
gpa_t gpa;
@@ -64,11 +71,9 @@ struct gfn_to_pfn_cache {
rwlock_t lock;
void *khva;
kvm_pfn_t pfn;
+ enum pfn_cache_usage usage;
bool active;
bool valid;
- bool dirty;
- bool kernel_map;
- bool guest_uses_pa;
};
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
diff --git a/include/linux/net.h b/include/linux/net.h
index ba736b457a06..12093f4db50c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -125,6 +125,25 @@ struct socket {
struct socket_wq wq;
};
+/*
+ * "descriptor" for what we're up to with a read.
+ * This allows us to use the same read code yet
+ * have multiple different users of the data that
+ * we read from a file.
+ *
+ * The simplest case just copies the data to user
+ * mode.
+ */
+typedef struct {
+ size_t written;
+ size_t count;
+ union {
+ char __user *buf;
+ void *data;
+ } arg;
+ int error;
+} read_descriptor_t;
+
struct vm_area_struct;
struct page;
struct sockaddr;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 4f44f83817a9..f626a445d1a8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -346,6 +346,7 @@ enum {
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
+ NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3,
NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a8d0b327b066..993994cd943a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -752,8 +752,6 @@ struct page *read_cache_page(struct address_space *, pgoff_t index,
filler_t *filler, void *data);
extern struct page * read_cache_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
-extern int read_cache_pages(struct address_space *mapping,
- struct list_head *pages, filler_t *filler, void *data);
static inline struct page *read_mapping_page(struct address_space *mapping,
pgoff_t index, struct file *file)
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index dffeb8281c2d..8f5a86e210b9 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -174,7 +174,7 @@ static inline unsigned int __map_depth(const struct sbitmap *sb, int index)
static inline void sbitmap_free(struct sbitmap *sb)
{
free_percpu(sb->alloc_hint);
- kfree(sb->map);
+ kvfree(sb->map);
sb->map = NULL;
}
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 88cc16444b43..60820ab511d2 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -162,6 +162,7 @@ int seq_dentry(struct seq_file *, struct dentry *, const char *);
int seq_path_root(struct seq_file *m, const struct path *path,
const struct path *root, const char *esc);
+void *single_start(struct seq_file *, loff_t *);
int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t);
int single_release(struct inode *, struct file *);
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
new file mode 100644
index 000000000000..e570840571e1
--- /dev/null
+++ b/include/linux/user_events.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2021, Microsoft Corporation.
+ *
+ * Authors:
+ * Beau Belgrave <beaub@linux.microsoft.com>
+ */
+#ifndef _UAPI_LINUX_USER_EVENTS_H
+#define _UAPI_LINUX_USER_EVENTS_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifdef __KERNEL__
+#include <linux/uio.h>
+#else
+#include <sys/uio.h>
+#endif
+
+#define USER_EVENTS_SYSTEM "user_events"
+#define USER_EVENTS_PREFIX "u:"
+
+/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
+#define EVENT_BIT_FTRACE 0
+#define EVENT_BIT_PERF 1
+#define EVENT_BIT_OTHER 7
+
+#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
+#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
+#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
+
+/* Create dynamic location entry within a 32-bit value */
+#define DYN_LOC(offset, size) ((size) << 16 | (offset))
+
+/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */
+#define FLAG_BPF_ITER (1 << 0)
+
+/*
+ * Describes an event registration and stores the results of the registration.
+ * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum
+ * must set the size and name_args before invocation.
+ */
+struct user_reg {
+
+ /* Input: Size of the user_reg structure being used */
+ __u32 size;
+
+ /* Input: Pointer to string with event name, description and flags */
+ __u64 name_args;
+
+ /* Output: Byte index of the event within the status page */
+ __u32 status_index;
+
+ /* Output: Index of the event to use when writing data */
+ __u32 write_index;
+};
+
+#define DIAG_IOC_MAGIC '*'
+
+/* Requests to register a user_event */
+#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*)
+
+/* Requests to delete a user_event */
+#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*)
+
+/* Data type that was passed to the BPF program */
+enum {
+ /* Data resides in kernel space */
+ USER_BPF_DATA_KERNEL,
+
+ /* Data resides in user space */
+ USER_BPF_DATA_USER,
+
+ /* Data is a pointer to a user_bpf_iter structure */
+ USER_BPF_DATA_ITER,
+};
+
+/*
+ * Describes an iovec iterator that BPF programs can use to access data for
+ * a given user_event write() / writev() call.
+ */
+struct user_bpf_iter {
+
+ /* Offset of the data within the first iovec */
+ __u32 iov_offset;
+
+ /* Number of iovec structures */
+ __u32 nr_segs;
+
+ /* Pointer to iovec structures */
+ const struct iovec *iov;
+};
+
+/* Context that BPF programs receive when attached to a user_event */
+struct user_bpf_context {
+
+ /* Data type being passed (see union below) */
+ __u32 data_type;
+
+ /* Length of the data */
+ __u32 data_len;
+
+ /* Pointer to data, varies by data type */
+ union {
+ /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */
+ void *kdata;
+
+ /* User data (data_type == USER_BPF_DATA_USER) */
+ void *udata;
+
+ /* Direct iovec (data_type == USER_BPF_DATA_ITER) */
+ struct user_bpf_iter *iter;
+ };
+};
+
+#endif /* _UAPI_LINUX_USER_EVENTS_H */
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index bb52b786be1b..72feab5ea8d4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -9,6 +9,7 @@
* See Documentation/core-api/xarray.rst for how to use the XArray.
*/
+#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/gfp.h>