diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/blk-cgroup.h | 5 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 31 | ||||
-rw-r--r-- | include/linux/fsverity.h | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 60 | ||||
-rw-r--r-- | include/linux/kvm_types.h | 11 | ||||
-rw-r--r-- | include/linux/net.h | 19 | ||||
-rw-r--r-- | include/linux/nvme.h | 1 | ||||
-rw-r--r-- | include/linux/pagemap.h | 2 | ||||
-rw-r--r-- | include/linux/sbitmap.h | 2 | ||||
-rw-r--r-- | include/linux/seq_file.h | 1 | ||||
-rw-r--r-- | include/linux/user_events.h | 116 | ||||
-rw-r--r-- | include/linux/xarray.h | 1 |
13 files changed, 195 insertions, 58 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f2ad8ed8f777..652cd05b0924 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -95,7 +95,10 @@ struct blkcg_gq { spinlock_t async_bio_lock; struct bio_list async_bios; - struct work_struct async_bio_work; + union { + struct work_struct async_bio_work; + struct work_struct free_work; + }; atomic_t use_delay; atomic64_t delay_nsec; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dd0763a1c674..1973ef9bd40f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -85,8 +85,10 @@ struct block_device { */ #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) typedef u32 __bitwise blk_status_t; +typedef u32 blk_short_t; #else typedef u8 __bitwise blk_status_t; +typedef u16 blk_short_t; #endif #define BLK_STS_OK 0 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) diff --git a/include/linux/fs.h b/include/linux/fs.h index 183160872133..bbde95387a23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -275,7 +275,6 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; -#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */ #define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct * helper code (eg buffer layer) * to clear GFP_FS from alloc */ @@ -338,28 +337,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } -/* - * "descriptor" for what we're up to with a read. - * This allows us to use the same read code yet - * have multiple different users of the data that - * we read from a file. - * - * The simplest case just copies the data to user - * mode. - */ -typedef struct { - size_t written; - size_t count; - union { - char __user *buf; - void *data; - } arg; - int error; -} read_descriptor_t; - -typedef int (*read_actor_t)(read_descriptor_t *, struct page *, - unsigned long, unsigned long); - struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); @@ -370,12 +347,6 @@ struct address_space_operations { /* Mark a folio dirty. Return true if this dirtied it */ bool (*dirty_folio)(struct address_space *, struct folio *); - /* - * Reads in the requested pages. Unlike ->readpage(), this is - * PURELY used for read-ahead!. - */ - int (*readpages)(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages); void (*readahead)(struct readahead_control *); int (*write_begin)(struct file *, struct address_space *mapping, @@ -3027,7 +2998,7 @@ extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); -extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); +ssize_t generic_perform_write(struct kiocb *, struct iov_iter *); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index b568b3c7d095..a7afc800bd8d 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -221,7 +221,7 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) * * This checks whether ->i_verity_info has been set. * - * Filesystems call this from ->readpages() to check whether the pages need to + * Filesystems call this from ->readahead() to check whether the pages need to * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to * a race condition where the file is being read concurrently with * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9536ffa0473b..3f9b22c4983a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -148,6 +148,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) +#define KVM_REQUEST_NO_ACTION BIT(10) /* * Architecture-independent vcpu->requests bit members * Bits 4-7 are reserved for more arch-independent bits. @@ -156,9 +157,18 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 -#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQUEST_ARCH_BASE 8 +/* + * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to + * OUTSIDE_GUEST_MODE. KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick" + * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing + * on. A kick only guarantees that the vCPU is on its way out, e.g. a previous + * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no + * guarantee the vCPU received an IPI and has actually exited guest mode. + */ +#define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) + #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ @@ -1221,27 +1231,27 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * @gpc: struct gfn_to_pfn_cache object. * @vcpu: vCPU to be used for marking pages dirty and to be woken on * invalidation. - * @guest_uses_pa: indicates that the resulting host physical PFN is used while - * @vcpu is IN_GUEST_MODE so invalidations should wake it. - * @kernel_map: requests a kernel virtual mapping (kmap / memremap). + * @usage: indicates if the resulting host physical PFN is used while + * the @vcpu is IN_GUEST_MODE (in which case invalidation of + * the cache from MMU notifiers---but not for KVM memslot + * changes!---will also force @vcpu to exit the guest and + * refresh the cache); and/or if the PFN used directly + * by KVM (and thus needs a kernel virtual mapping). * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * @dirty: mark the cache dirty immediately. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @kvm's list for - * invalidations to be processed. Invalidation callbacks to @vcpu using - * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM - * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check() - * to ensure that the cache is valid before accessing the target page. + * invalidations to be processed. Callers are required to use + * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before + * accessing the target page. */ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - struct kvm_vcpu *vcpu, bool guest_uses_pa, - bool kernel_map, gpa_t gpa, unsigned long len, - bool dirty); + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, + gpa_t gpa, unsigned long len); /** * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. @@ -1250,7 +1260,6 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * @gpc: struct gfn_to_pfn_cache object. * @gpa: current guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * @dirty: mark the cache dirty immediately. * * @return: %true if the cache is still valid and the address matches. * %false if the cache is not valid. @@ -1272,7 +1281,6 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * @gpc: struct gfn_to_pfn_cache object. * @gpa: updated guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * @dirty: mark the cache dirty immediately. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. @@ -1285,7 +1293,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * with the lock still held to permit access. */ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len, bool dirty); + gpa_t gpa, unsigned long len); /** * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache. @@ -1293,10 +1301,9 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * - * This unmaps the referenced page and marks it dirty, if appropriate. The - * cache is left in the invalid state but at least the mapping from GPA to - * userspace HVA will remain cached and can be reused on a subsequent - * refresh. + * This unmaps the referenced page. The cache is left in the invalid state + * but at least the mapping from GPA to userspace HVA will remain cached + * and can be reused on a subsequent refresh. */ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); @@ -1984,7 +1991,7 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) void kvm_arch_irq_routing_update(struct kvm *kvm); -static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) +static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* * Ensure the rest of the request is published to kvm_check_request's @@ -1994,6 +2001,19 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } +static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) +{ + /* + * Request that don't require vCPU action should never be logged in + * vcpu->requests. The vCPU won't clear the request, so it will stay + * logged indefinitely and prevent the vCPU from entering the guest. + */ + BUILD_BUG_ON(!__builtin_constant_p(req) || + (req & KVM_REQUEST_NO_ACTION)); + + __kvm_make_request(req, vcpu); +} + static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index dceac12c1ce5..ac1ebb37a0ff 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -18,6 +18,7 @@ struct kvm_memslots; enum kvm_mr_change; +#include <linux/bits.h> #include <linux/types.h> #include <linux/spinlock_types.h> @@ -46,6 +47,12 @@ typedef u64 hfn_t; typedef hfn_t kvm_pfn_t; +enum pfn_cache_usage { + KVM_GUEST_USES_PFN = BIT(0), + KVM_HOST_USES_PFN = BIT(1), + KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN, +}; + struct gfn_to_hva_cache { u64 generation; gpa_t gpa; @@ -64,11 +71,9 @@ struct gfn_to_pfn_cache { rwlock_t lock; void *khva; kvm_pfn_t pfn; + enum pfn_cache_usage usage; bool active; bool valid; - bool dirty; - bool kernel_map; - bool guest_uses_pa; }; #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE diff --git a/include/linux/net.h b/include/linux/net.h index ba736b457a06..12093f4db50c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -125,6 +125,25 @@ struct socket { struct socket_wq wq; }; +/* + * "descriptor" for what we're up to with a read. + * This allows us to use the same read code yet + * have multiple different users of the data that + * we read from a file. + * + * The simplest case just copies the data to user + * mode. + */ +typedef struct { + size_t written; + size_t count; + union { + char __user *buf; + void *data; + } arg; + int error; +} read_descriptor_t; + struct vm_area_struct; struct page; struct sockaddr; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4f44f83817a9..f626a445d1a8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -346,6 +346,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a8d0b327b066..993994cd943a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -752,8 +752,6 @@ struct page *read_cache_page(struct address_space *, pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -extern int read_cache_pages(struct address_space *mapping, - struct list_head *pages, filler_t *filler, void *data); static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, struct file *file) diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index dffeb8281c2d..8f5a86e210b9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -174,7 +174,7 @@ static inline unsigned int __map_depth(const struct sbitmap *sb, int index) static inline void sbitmap_free(struct sbitmap *sb) { free_percpu(sb->alloc_hint); - kfree(sb->map); + kvfree(sb->map); sb->map = NULL; } diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 88cc16444b43..60820ab511d2 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -162,6 +162,7 @@ int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); +void *single_start(struct seq_file *, loff_t *); int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); int single_release(struct inode *, struct file *); diff --git a/include/linux/user_events.h b/include/linux/user_events.h new file mode 100644 index 000000000000..e570840571e1 --- /dev/null +++ b/include/linux/user_events.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, Microsoft Corporation. + * + * Authors: + * Beau Belgrave <beaub@linux.microsoft.com> + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +#ifdef __KERNEL__ +#include <linux/uio.h> +#else +#include <sys/uio.h> +#endif + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ +#define EVENT_BIT_FTRACE 0 +#define EVENT_BIT_PERF 1 +#define EVENT_BIT_OTHER 7 + +#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) +#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) +#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ +#define FLAG_BPF_ITER (1 << 0) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Byte index of the event within the status page */ + __u32 status_index; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +}; + +#define DIAG_IOC_MAGIC '*' + +/* Requests to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) + +/* Requests to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) + +/* Data type that was passed to the BPF program */ +enum { + /* Data resides in kernel space */ + USER_BPF_DATA_KERNEL, + + /* Data resides in user space */ + USER_BPF_DATA_USER, + + /* Data is a pointer to a user_bpf_iter structure */ + USER_BPF_DATA_ITER, +}; + +/* + * Describes an iovec iterator that BPF programs can use to access data for + * a given user_event write() / writev() call. + */ +struct user_bpf_iter { + + /* Offset of the data within the first iovec */ + __u32 iov_offset; + + /* Number of iovec structures */ + __u32 nr_segs; + + /* Pointer to iovec structures */ + const struct iovec *iov; +}; + +/* Context that BPF programs receive when attached to a user_event */ +struct user_bpf_context { + + /* Data type being passed (see union below) */ + __u32 data_type; + + /* Length of the data */ + __u32 data_len; + + /* Pointer to data, varies by data type */ + union { + /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ + void *kdata; + + /* User data (data_type == USER_BPF_DATA_USER) */ + void *udata; + + /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ + struct user_bpf_iter *iter; + }; +}; + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index bb52b786be1b..72feab5ea8d4 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -9,6 +9,7 @@ * See Documentation/core-api/xarray.rst for how to use the XArray. */ +#include <linux/bitmap.h> #include <linux/bug.h> #include <linux/compiler.h> #include <linux/gfp.h> |