diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-17 17:56:44 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-17 17:56:44 -0800 |
commit | 345b17acb1aa7a443741d9220f66b30d5ddd7c39 (patch) | |
tree | 8f75bc2b08540b47fc8c66a262f8add6bbad70eb /arch | |
parent | 787fec8ac15cc693b9a7bc1b4a338b92483d993c (diff) | |
parent | 1fb1abc83636f5329c26cd29f0f19f3faeb697a5 (diff) |
Merge tag 'for-linus-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger:
- IRQ handling cleanups
- Support for suspend
- Various fixes for UML specific drivers: ubd, vector, xterm
* tag 'for-linus-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml: (32 commits)
um: Fix build w/o CONFIG_PM_SLEEP
um: time-travel: Correct time event IRQ delivery
um: irq/sigio: Support suspend/resume handling of workaround IRQs
um: time-travel: Actually apply "free-until" optimisation
um: chan_xterm: Fix fd leak
um: tty: Fix handling of close in tty lines
um: Monitor error events in IRQ controller
um: allocate a guard page to helper threads
um: support some of ARCH_HAS_SET_MEMORY
um: time-travel: avoid multiple identical propagations
um: Fetch registers only for signals which need them
um: Support suspend to RAM
um: Allow PM with suspend-to-idle
um: time: Fix read_persistent_clock64() in time-travel
um: Simplify os_idle_sleep() and sleep longer
um: Simplify IRQ handling code
um: Remove IRQ_NONE type
um: irq: Reduce irq_reg allocation
um: irq: Clean up and rename struct irq_fd
um: Clean up alarm IRQ chip name
...
Diffstat (limited to 'arch')
35 files changed, 897 insertions, 588 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 43333e36e0ba..34d302d1a07f 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -15,6 +15,7 @@ config UML select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE select NO_DMA + select ARCH_HAS_SET_MEMORY select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select HAVE_GCC_PLUGINS @@ -191,3 +192,8 @@ config UML_TIME_TRAVEL_SUPPORT endmenu source "arch/um/drivers/Kconfig" + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +source "kernel/power/Kconfig" diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 4d80526a4236..d8845d4aac6a 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -26,10 +26,10 @@ int generic_read(int fd, char *c_out, void *unused) n = read(fd, c_out, sizeof(*c_out)); if (n > 0) return n; - else if (errno == EAGAIN) - return 0; else if (n == 0) return -EIO; + else if (errno == EAGAIN) + return 0; return -errno; } diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 14ad9f495fe6..1c70a31e7c5b 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -262,19 +262,25 @@ static irqreturn_t line_write_interrupt(int irq, void *data) int line_setup_irq(int fd, int input, int output, struct line *line, void *data) { const struct line_driver *driver = line->driver; - int err = 0; + int err; - if (input) + if (input) { err = um_request_irq(driver->read_irq, fd, IRQ_READ, line_interrupt, IRQF_SHARED, driver->read_irq_name, data); - if (err) - return err; - if (output) + if (err < 0) + return err; + } + + if (output) { err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, line_write_interrupt, IRQF_SHARED, driver->write_irq_name, data); - return err; + if (err < 0) + return err; + } + + return 0; } static int line_activate(struct tty_port *port, struct tty_struct *tty) @@ -608,7 +614,6 @@ static void free_winch(struct winch *winch) winch->fd = -1; if (fd != -1) os_close_file(fd); - list_del(&winch->list); __free_winch(&winch->work); } @@ -709,6 +714,8 @@ static void unregister_winch(struct tty_struct *tty) winch = list_entry(ele, struct winch, list); wtty = tty_port_tty_get(winch->port); if (wtty == tty) { + list_del(&winch->list); + spin_unlock(&winch_handler_lock); free_winch(winch); break; } @@ -719,14 +726,17 @@ static void unregister_winch(struct tty_struct *tty) static void winch_cleanup(void) { - struct list_head *ele, *next; struct winch *winch; spin_lock(&winch_handler_lock); + while ((winch = list_first_entry_or_null(&winch_handlers, + struct winch, list))) { + list_del(&winch->list); + spin_unlock(&winch_handler_lock); - list_for_each_safe(ele, next, &winch_handlers) { - winch = list_entry(ele, struct winch, list); free_winch(winch); + + spin_lock(&winch_handler_lock); } spin_unlock(&winch_handler_lock); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index a2e680f7d39f..6d00af25ec6b 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -738,7 +738,7 @@ static int __init mconsole_init(void) err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, IRQF_SHARED, "mconsole", (void *)sock); - if (err) { + if (err < 0) { printk(KERN_ERR "Failed to get IRQ for management console\n"); goto out; } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 1802cf4ef5a5..2fc0b038ff8a 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -160,7 +160,7 @@ static int uml_net_open(struct net_device *dev) err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, IRQF_SHARED, dev->name, dev); - if (err != 0) { + if (err < 0) { printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); err = -ENETUNREACH; goto out_close; diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index a47ca5376d9d..efa8b7304090 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -100,7 +100,7 @@ static int port_accept(struct port_list *port) .port = port }); if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, - IRQF_SHARED, "telnetd", conn)) { + IRQF_SHARED, "telnetd", conn) < 0) { printk(KERN_ERR "port_accept : failed to get IRQ for " "telnetd\n"); goto out_free; @@ -182,7 +182,7 @@ void *port_data(int port_num) } if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, - IRQF_SHARED, "port", port)) { + IRQF_SHARED, "port", port) < 0) { printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); goto out_close; } diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index ce115fce52f0..433a3f8f2ef3 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/interrupt.h> #include <linux/miscdevice.h> +#include <linux/hw_random.h> #include <linux/delay.h> #include <linux/uaccess.h> #include <init.h> @@ -18,9 +19,8 @@ #include <os.h> /* - * core module and version information + * core module information */ -#define RNG_VERSION "1.0.0" #define RNG_MODULE_NAME "hw_random" /* Changed at init time, in the non-modular case, and at module load @@ -28,88 +28,36 @@ * protects against a module being loaded twice at the same time. */ static int random_fd = -1; -static DECLARE_WAIT_QUEUE_HEAD(host_read_wait); +static struct hwrng hwrng = { 0, }; +static DECLARE_COMPLETION(have_data); -static int rng_dev_open (struct inode *inode, struct file *filp) +static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block) { - /* enforce read-only access to this chrdev */ - if ((filp->f_mode & FMODE_READ) == 0) - return -EINVAL; - if ((filp->f_mode & FMODE_WRITE) != 0) - return -EINVAL; + int ret; - return 0; -} - -static atomic_t host_sleep_count = ATOMIC_INIT(0); - -static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, - loff_t *offp) -{ - u32 data; - int n, ret = 0, have_data; - - while (size) { - n = os_read_file(random_fd, &data, sizeof(data)); - if (n > 0) { - have_data = n; - while (have_data && size) { - if (put_user((u8) data, buf++)) { - ret = ret ? : -EFAULT; - break; - } - size--; - ret++; - have_data--; - data >>= 8; - } - } - else if (n == -EAGAIN) { - DECLARE_WAITQUEUE(wait, current); - - if (filp->f_flags & O_NONBLOCK) - return ret ? : -EAGAIN; - - atomic_inc(&host_sleep_count); + for (;;) { + ret = os_read_file(random_fd, buf, max); + if (block && ret == -EAGAIN) { add_sigio_fd(random_fd); - add_wait_queue(&host_read_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); + ret = wait_for_completion_killable(&have_data); - schedule(); - remove_wait_queue(&host_read_wait, &wait); + ignore_sigio_fd(random_fd); + deactivate_fd(random_fd, RANDOM_IRQ); - if (atomic_dec_and_test(&host_sleep_count)) { - ignore_sigio_fd(random_fd); - deactivate_fd(random_fd, RANDOM_IRQ); - } + if (ret < 0) + break; + } else { + break; } - else - return n; - - if (signal_pending (current)) - return ret ? : -ERESTARTSYS; } - return ret; -} - -static const struct file_operations rng_chrdev_ops = { - .owner = THIS_MODULE, - .open = rng_dev_open, - .read = rng_dev_read, - .llseek = noop_llseek, -}; -/* rng_init shouldn't be called more than once at boot time */ -static struct miscdevice rng_miscdev = { - HWRNG_MINOR, - RNG_MODULE_NAME, - &rng_chrdev_ops, -}; + return ret != -EAGAIN ? ret : 0; +} static irqreturn_t random_interrupt(int irq, void *data) { - wake_up(&host_read_wait); + complete(&have_data); return IRQ_HANDLED; } @@ -126,18 +74,19 @@ static int __init rng_init (void) goto out; random_fd = err; - err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt, 0, "random", NULL); - if (err) + if (err < 0) goto err_out_cleanup_hw; - sigio_broken(random_fd, 1); + sigio_broken(random_fd); + hwrng.name = RNG_MODULE_NAME; + hwrng.read = rng_dev_read; + hwrng.quality = 1024; - err = misc_register (&rng_miscdev); + err = hwrng_register(&hwrng); if (err) { - printk (KERN_ERR RNG_MODULE_NAME ": misc device register " - "failed\n"); + pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err); goto err_out_cleanup_hw; } out: @@ -161,8 +110,8 @@ static void cleanup(void) static void __exit rng_cleanup(void) { + hwrng_unregister(&hwrng); os_close_file(random_fd); - misc_deregister (&rng_miscdev); } module_init (rng_init); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index eae8c83364f7..13b1fe694b90 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -47,18 +47,25 @@ /* Max request size is determined by sector mask - 32K */ #define UBD_MAX_REQUEST (8 * sizeof(long)) +struct io_desc { + char *buffer; + unsigned long length; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; +}; + struct io_thread_req { struct request *req; int fds[2]; unsigned long offsets[2]; unsigned long long offset; - unsigned long length; - char *buffer; int sectorsize; - unsigned long sector_mask; - unsigned long long cow_offset; - unsigned long bitmap_words[2]; int error; + + int desc_cnt; + /* io_desc has to be the last element of the struct */ + struct io_desc io_desc[]; }; @@ -148,6 +155,7 @@ struct ubd { /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; + char *serial; int count; int fd; __u64 size; @@ -173,6 +181,7 @@ struct ubd { #define DEFAULT_UBD { \ .file = NULL, \ + .serial = NULL, \ .count = 0, \ .fd = -1, \ .size = -1, \ @@ -265,7 +274,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) { struct ubd *ubd_dev; struct openflags flags = global_openflags; - char *backing_file; + char *file, *backing_file, *serial; int n, err = 0, i; if(index_out) *index_out = -1; @@ -361,24 +370,27 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) goto out; break_loop: - backing_file = strchr(str, ','); + file = strsep(&str, ",:"); + if (*file == '\0') + file = NULL; - if (backing_file == NULL) - backing_file = strchr(str, ':'); + backing_file = strsep(&str, ",:"); + if (*backing_file == '\0') + backing_file = NULL; - if(backing_file != NULL){ - if(ubd_dev->no_cow){ - *error_out = "Can't specify both 'd' and a cow file"; - goto out; - } - else { - *backing_file = '\0'; - backing_file++; - } + serial = strsep(&str, ",:"); + if (*serial == '\0') + serial = NULL; + + if (backing_file && ubd_dev->no_cow) { + *error_out = "Can't specify both 'd' and a cow file"; + goto out; } + err = 0; - ubd_dev->file = str; + ubd_dev->file = file; ubd_dev->cow.file = backing_file; + ubd_dev->serial = serial; ubd_dev->boot_openflags = flags; out: mutex_unlock(&ubd_lock); @@ -399,7 +411,7 @@ static int ubd_setup(char *str) __setup("ubd", ubd_setup); __uml_help(ubd_setup, -"ubd<n><flags>=<filename>[(:|,)<filename2>]\n" +"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n" " This is used to associate a device with a file in the underlying\n" " filesystem. When specifying two filenames, the first one is the\n" " COW name and the second is the backing file name. As separator you can\n" @@ -422,6 +434,12 @@ __uml_help(ubd_setup, " UMLs and file locking will be turned off - this is appropriate for a\n" " cluster filesystem and inappropriate at almost all other times.\n\n" " 't' will disable trim/discard support on the device (enabled by default).\n\n" +" An optional device serial number can be exposed using the serial parameter\n" +" on the cmdline which is exposed as a sysfs entry. This is particularly\n" +" useful when a unique number should be given to the device. Note when\n" +" specifying a label, the filename2 must be also presented. It can be\n" +" an empty string, in which case the backing file is not used:\n" +" ubd0=File,,Serial\n" ); static int udb_setup(char *str) @@ -525,12 +543,7 @@ static void ubd_handler(void) blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); } - if ((io_req->error) || (io_req->buffer == NULL)) - blk_mq_end_request(io_req->req, io_req->error); - else { - if (!blk_update_request(io_req->req, io_req->error, io_req->length)) - __blk_mq_end_request(io_req->req, io_req->error); - } + blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); } } @@ -866,6 +879,41 @@ static void ubd_device_release(struct device *dev) *ubd_dev = ((struct ubd) DEFAULT_UBD); } +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct ubd *ubd_dev = disk->private_data; + + if (!ubd_dev) + return 0; + + return sprintf(buf, "%s", ubd_dev->serial); +} + +static DEVICE_ATTR_RO(serial); + +static struct attribute *ubd_attrs[] = { + &dev_attr_serial.attr, + NULL, +}; + +static umode_t ubd_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + return a->mode; +} + +static const struct attribute_group ubd_attr_group = { + .attrs = ubd_attrs, + .is_visible = ubd_attrs_are_visible, +}; + +static const struct attribute_group *ubd_attr_groups[] = { + &ubd_attr_group, + NULL, +}; + static int ubd_disk_register(int major, u64 size, int unit, struct gendisk **disk_out) { @@ -897,7 +945,7 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk, NULL); + device_add_disk(parent, disk, ubd_attr_groups); *disk_out = disk; return 0; @@ -946,6 +994,7 @@ static int ubd_add(int n, char **error_out) blk_queue_write_cache(ubd_dev->queue, true, false); blk_queue_max_segments(ubd_dev->queue, MAX_SG); + blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; @@ -1192,7 +1241,7 @@ static int __init ubd_driver_init(void){ /* Letting ubd=sync be like using ubd#s= instead of ubd#= is * enough. So use anyway the io thread. */ } - stack = alloc_stack(0, 0); + stack = alloc_stack(0); io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), &thread_fd); if(io_pid < 0){ @@ -1204,7 +1253,7 @@ static int __init ubd_driver_init(void){ } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 0, "ubd", ubd_devs); - if(err != 0) + if(err < 0) printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); return 0; } @@ -1289,37 +1338,74 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, *cow_offset += bitmap_offset; } -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, +static void cowify_req(struct io_thread_req *req, struct io_desc *segment, + unsigned long offset, unsigned long *bitmap, __u64 bitmap_offset, __u64 bitmap_len) { - __u64 sector = req->offset >> SECTOR_SHIFT; + __u64 sector = offset >> SECTOR_SHIFT; int i; - if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT) + if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) panic("Operation too long"); if (req_op(req->req) == REQ_OP_READ) { - for (i = 0; i < req->length >> SECTOR_SHIFT; i++) { + for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) - &req->sector_mask); + &segment->sector_mask); } + } else { + cowify_bitmap(offset, segment->length, &segment->sector_mask, + &segment->cow_offset, bitmap, bitmap_offset, + segment->bitmap_words, bitmap_len); } - else cowify_bitmap(req->offset, req->length, &req->sector_mask, - &req->cow_offset, bitmap, bitmap_offset, - req->bitmap_words, bitmap_len); } -static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, - u64 off, struct bio_vec *bvec) +static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, + struct request *req) +{ + struct bio_vec bvec; + struct req_iterator iter; + int i = 0; + unsigned long byte_offset = io_req->offset; + int op = req_op(req); + + if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { + io_req->io_desc[0].buffer = NULL; + io_req->io_desc[0].length = blk_rq_bytes(req); + } else { + rq_for_each_segment(bvec, req, iter) { + BUG_ON(i >= io_req->desc_cnt); + + io_req->io_desc[i].buffer = + page_address(bvec.bv_page) + bvec.bv_offset; + io_req->io_desc[i].length = bvec.bv_len; + i++; + } + } + + if (dev->cow.file) { + for (i = 0; i < io_req->desc_cnt; i++) { + cowify_req(io_req, &io_req->io_desc[i], byte_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + byte_offset += io_req->io_desc[i].length; + } + + } +} + +static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, + int desc_cnt) { - struct ubd *dev = hctx->queue->queuedata; struct io_thread_req *io_req; - int ret; + int i; - io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + io_req = kmalloc(sizeof(*io_req) + + (desc_cnt * sizeof(struct io_desc)), + GFP_ATOMIC); if (!io_req) - return -ENOMEM; + return NULL; io_req->req = req; if (dev->cow.file) @@ -1327,26 +1413,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, else io_req->fds[0] = dev->fd; io_req->error = 0; - - if (bvec != NULL) { - io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; - io_req->length = bvec->bv_len; - } else { - io_req->buffer = NULL; - io_req->length = blk_rq_bytes(req); - } - io_req->sectorsize = SECTOR_SIZE; io_req->fds[1] = dev->fd; - io_req->cow_offset = -1; - io_req->offset = off; - io_req->sector_mask = 0; + io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - if (dev->cow.file) - cowify_req(io_req, dev->cow.bitmap, - dev->cow.bitmap_offset, dev->cow.bitmap_len); + for (i = 0 ; i < desc_cnt; i++) { + io_req->io_desc[i].sector_mask = 0; + io_req->io_desc[i].cow_offset = -1; + } + + return io_req; +} + +static int ubd_submit_request(struct ubd *dev, struct request *req) +{ + int segs = 0; + struct io_thread_req *io_req; + int ret; + int op = req_op(req); + + if (op == REQ_OP_FLUSH) + segs = 0; + else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) + segs = 1; + else + segs = blk_rq_nr_phys_segments(req); + + io_req = ubd_alloc_req(dev, req, segs); + if (!io_req) + return -ENOMEM; + + io_req->desc_cnt = segs; + if (segs) + ubd_map_req(dev, io_req, req); ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (ret != sizeof(io_req)) { @@ -1357,22 +1458,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, return ret; } -static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req) -{ - struct req_iterator iter; - struct bio_vec bvec; - int ret; - u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT; - - rq_for_each_segment(bvec, req, iter) { - ret = ubd_queue_one_vec(hctx, req, off, &bvec); - if (ret < 0) - return ret; - off += bvec.bv_len; - } - return 0; -} - static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1385,17 +1470,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&ubd_dev->lock); switch (req_op(req)) { - /* operations with no lentgth/offset arguments */ case REQ_OP_FLUSH: - ret = ubd_queue_one_vec(hctx, req, 0, NULL); - break; case REQ_OP_READ: case REQ_OP_WRITE: - ret = queue_rw_req(hctx, req); - break; case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); + ret = ubd_submit_request(ubd_dev, req); break; default: WARN_ON_ONCE(1); @@ -1483,22 +1563,22 @@ static int map_error(int error_code) * will result in unpredictable behaviour and/or crashes. */ -static int update_bitmap(struct io_thread_req *req) +static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) { int n; - if(req->cow_offset == -1) + if (segment->cow_offset == -1) return map_error(0); - n = os_pwrite_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words), req->cow_offset); - if (n != sizeof(req->bitmap_words)) + n = os_pwrite_file(req->fds[1], &segment->bitmap_words, + sizeof(segment->bitmap_words), segment->cow_offset); + if (n != sizeof(segment->bitmap_words)) return map_error(-n); return map_error(0); } -static void do_io(struct io_thread_req *req) +static void do_io(struct io_thread_req *req, struct io_desc *desc) { char *buf = NULL; unsigned long len; @@ -1513,21 +1593,20 @@ static void do_io(struct io_thread_req *req) return; } - nsectors = req->length / req->sectorsize; + nsectors = desc->length / req->sectorsize; start = 0; do { - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); end = start; while((end < nsectors) && - (ubd_test_bit(end, (unsigned char *) - &req->sector_mask) == bit)) + (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) end++; off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; - if (req->buffer != NULL) - buf = &req->buffer[start * req->sectorsize]; + if (desc->buffer != NULL) + buf = &desc->buffer[start * req->sectorsize]; switch (req_op(req->req)) { case REQ_OP_READ: @@ -1567,7 +1646,8 @@ static void do_io(struct io_thread_req *req) start = end; } while(start < nsectors); - req->error = update_bitmap(req); + req->offset += len; + req->error = update_bitmap(req, desc); } /* Changed in start_io_thread, which is serialized by being called only @@ -1600,8 +1680,13 @@ int io_thread(void *arg) } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { + struct io_thread_req *req = (*io_req_buffer)[count]; + int i; + io_count++; - do_io((*io_req_buffer)[count]); + for (i = 0; !req->error && i < req->desc_cnt; i++) + do_io(req, &(req->io_desc[i])); + } written = 0; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 555203e3e7b4..47a02e60898d 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1196,9 +1196,9 @@ static int vector_net_close(struct net_device *dev) /* TX tasklet */ -static void vector_tx_poll(unsigned long data) +static void vector_tx_poll(struct tasklet_struct *t) { - struct vector_private *vp = (struct vector_private *)data; + struct vector_private *vp = from_tasklet(vp, t, tx_poll); vp->estats.tx_kicks++; vector_send(vp->tx_queue); @@ -1271,7 +1271,7 @@ static int vector_net_open(struct net_device *dev) irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, IRQ_READ, vector_rx_interrupt, IRQF_SHARED, dev->name, dev); - if (err != 0) { + if (err < 0) { netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err); err = -ENETUNREACH; goto out_close; @@ -1286,7 +1286,7 @@ static int vector_net_open(struct net_device *dev) irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd, IRQ_WRITE, vector_tx_interrupt, IRQF_SHARED, dev->name, dev); - if (err != 0) { + if (err < 0) { netdev_err(dev, "vector_open: failed to get tx irq(%d)\n", err); err = -ENETUNREACH; @@ -1629,7 +1629,7 @@ static void vector_eth_configure( }); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); - tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp); + tasklet_setup(&vp->tx_poll, vector_tx_poll); INIT_WORK(&vp->reset_tx, vector_reset_tx); timer_setup(&vp->tl, vector_timer_expire, 0); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index a6c4bb6c2c01..27e92d3881ff 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -33,11 +33,6 @@ #include <os.h> #include "vhost_user.h" -/* Workaround due to a conflict between irq_user.h and irqreturn.h */ -#ifdef IRQ_NONE -#undef IRQ_NONE -#endif - #define MAX_SUPPORTED_QUEUE_SIZE 256 #define to_virtio_uml_device(_vdev) \ @@ -55,7 +50,7 @@ struct virtio_uml_device { struct platform_device *pdev; spinlock_t sock_lock; - int sock, req_fd; + int sock, req_fd, irq; u64 features; u64 protocol_features; u8 status; @@ -409,12 +404,14 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) return rc; vu_dev->req_fd = req_fds[0]; - rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ, + rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, vu_req_interrupt, IRQF_SHARED, vu_dev->pdev->name, vu_dev); - if (rc) + if (rc < 0) goto err_close; + vu_dev->irq = rc; + rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, req_fds[1]); if (rc) @@ -423,7 +420,7 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) goto out; err_free_irq: - um_free_irq(VIRTIO_IRQ, vu_dev); + um_free_irq(vu_dev->irq, vu_dev); err_close: os_close_file(req_fds[0]); out: @@ -802,7 +799,11 @@ static void vu_del_vq(struct virtqueue *vq) struct virtio_uml_vq_info *info = vq->priv; if (info->call_fd >= 0) { - um_free_irq(VIRTIO_IRQ, vq); + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + um_free_irq(vu_dev->irq, vq); os_close_file(info->call_fd); } @@ -852,9 +853,9 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, return rc; info->call_fd = call_fds[0]; - rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, + rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, vu_interrupt, IRQF_SHARED, info->name, vq); - if (rc) + if (rc < 0) goto close_both; rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); @@ -864,7 +865,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, goto out; release_irq: - um_free_irq(VIRTIO_IRQ, vq); + um_free_irq(vu_dev->irq, vq); close_both: os_close_file(call_fds[0]); out: @@ -969,7 +970,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, error_setup: if (info->call_fd >= 0) { - um_free_irq(VIRTIO_IRQ, vq); + um_free_irq(vu_dev->irq, vq); os_close_file(info->call_fd); } error_call: @@ -1078,7 +1079,7 @@ static void virtio_uml_release_dev(struct device *d) /* might not have been opened due to not negotiating the feature */ if (vu_dev->req_fd >= 0) { - um_free_irq(VIRTIO_IRQ, vu_dev); + um_free_irq(vu_dev->irq, vu_dev); os_close_file(vu_dev->req_fd); } diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index fc7f1e746703..87ca4a47cd66 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -18,6 +18,7 @@ struct xterm_chan { int pid; int helper_pid; + int chan_fd; char *title; int device; int raw; @@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts) return NULL; *data = ((struct xterm_chan) { .pid = -1, .helper_pid = -1, + .chan_fd = -1, .device = device, .title = opts->xterm_title, .raw = opts->raw } ); @@ -149,6 +151,7 @@ static int xterm_open(int input, int output, int primary, void *d, goto out_kill; } + data->chan_fd = fd; new = xterm_fd(fd, &data->helper_pid); if (new < 0) { err = new; @@ -206,6 +209,8 @@ static void xterm_close(int fd, void *d) os_kill_process(data->helper_pid, 0); data->helper_pid = -1; + if (data->chan_fd != -1) + os_close_file(data->chan_fd); os_close_file(fd); } diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index d64ef6d0d463..50f11b7b4774 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -51,7 +51,7 @@ int xterm_fd(int socket, int *pid_out) err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, IRQF_SHARED, "xterm", data); - if (err) { + if (err < 0) { printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " "err = %d\n", err); ret = err; diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index 42c6205e2dc4..547bff7b3a89 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -17,21 +17,20 @@ #define TELNETD_IRQ 12 #define XTERM_IRQ 13 #define RANDOM_IRQ 14 -#define VIRTIO_IRQ 15 #ifdef CONFIG_UML_NET_VECTOR -#define VECTOR_BASE_IRQ (VIRTIO_IRQ + 1) +#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1) #define VECTOR_IRQ_SPACE 8 -#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1) +#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ) #else -#define LAST_IRQ VIRTIO_IRQ +#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1) #endif -#define NR_IRQS (LAST_IRQ + 1) +#define NR_IRQS 64 #endif diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index def376194dce..39376bb63abf 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -55,12 +55,15 @@ extern unsigned long end_iomem; #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) #define __PAGE_KERNEL_EXEC \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_KERNEL_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) /* * The i386 can't do page protection for execute, and considers that the same diff --git a/arch/um/include/asm/set_memory.h b/arch/um/include/asm/set_memory.h new file mode 100644 index 000000000000..24266c63720d --- /dev/null +++ b/arch/um/include/asm/set_memory.h @@ -0,0 +1 @@ +#include <asm-generic/set_memory.h> diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index f3b03d39a854..68e45e950137 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -28,7 +28,7 @@ struct time_travel_event { extern enum time_travel_mode time_travel_mode; -void time_travel_sleep(unsigned long long duration); +void time_travel_sleep(void); static inline void time_travel_set_event_fn(struct time_travel_event *e, @@ -60,7 +60,7 @@ struct time_travel_event { #define time_travel_mode TT_MODE_OFF -static inline void time_travel_sleep(unsigned long long duration) +static inline void time_travel_sleep(void) { } diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 4e99fe05576a..16a51a8c800f 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -40,3 +40,6 @@ DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); #ifdef CONFIG_64BIT DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); #endif +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); +#endif diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h index 7cd1a10c6244..7807de593bda 100644 --- a/arch/um/include/shared/irq_kern.h +++ b/arch/um/include/shared/irq_kern.h @@ -8,11 +8,12 @@ #include <linux/interrupt.h> #include <asm/ptrace.h> +#include "irq_user.h" -extern int um_request_irq(unsigned int irq, int fd, int type, - irq_handler_t handler, - unsigned long irqflags, const char * devname, - void *dev_id); -void um_free_irq(unsigned int irq, void *dev); -#endif +#define UM_IRQ_ALLOC -1 +int um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id); +void um_free_irq(int irq, void *dev_id); +#endif diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index 107751dce153..07239e801a5b 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -9,25 +9,12 @@ #include <sysdep/ptrace.h> #include <stdbool.h> -struct irq_fd { - struct irq_fd *next; - void *id; - int fd; - int type; - int irq; - int events; - bool active; - bool pending; - bool purge; +enum um_irq_type { + IRQ_READ, + IRQ_WRITE, + NUM_IRQ_TYPES, }; -#define IRQ_READ 0 -#define IRQ_WRITE 1 -#define IRQ_NONE 2 -#define MAX_IRQ_TYPE (IRQ_NONE + 1) - - - struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void free_irq_by_fd(int fd); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index ccafb62e8cce..d8c279e3312f 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -19,7 +19,7 @@ extern int kmalloc_ok; #define UML_ROUND_UP(addr) \ ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) -extern unsigned long alloc_stack(int order, int atomic); +extern unsigned long alloc_stack(int atomic); extern void free_stack(unsigned long stack, int order); struct pt_regs; @@ -39,6 +39,8 @@ extern int is_syscall(unsigned long addr); extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void uml_pm_wake(void); + extern int start_uml(void); extern void paging_init(void); @@ -66,5 +68,6 @@ extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs); extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void fatal_sigsegv(void) __attribute__ ((noreturn)); +void um_idle_sleep(void); #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index f467d28fc0b4..13d86f94cf0f 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -233,6 +233,7 @@ extern void timer_set_signal_handler(void); extern void set_sigstack(void *sig_stack, int size); extern void remove_sigstack(void); extern void set_handler(int sig); +extern void send_sigio_to_self(void); extern int change_sig(int signal, int on); extern void block_signals(void); extern void unblock_signals(void); @@ -241,6 +242,7 @@ extern int set_signals(int enable); extern int set_signals_trace(int enable); extern int os_is_signal_stack(void); extern void deliver_alarm(void); +extern void register_pm_wake_signal(void); /* util.c */ extern void stack_protections(unsigned long address); @@ -256,7 +258,7 @@ extern void os_warn(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); /* time.c */ -extern void os_idle_sleep(unsigned long long nsecs); +extern void os_idle_sleep(void); extern int os_timer_create(void); extern int os_timer_set_interval(unsigned long long nsecs); extern int os_timer_one_shot(unsigned long long nsecs); @@ -299,19 +301,29 @@ extern void reboot_skas(void); extern int os_waiting_for_events_epoll(void); extern void *os_epoll_get_data_pointer(int index); extern int os_epoll_triggered(int index, int events); -extern int os_event_mask(int irq_type); +extern int os_event_mask(enum um_irq_type irq_type); extern int os_setup_epoll(void); extern int os_add_epoll_fd(int events, int fd, void *data); extern int os_mod_epoll_fd(int events, int fd, void *data); extern int os_del_epoll_fd(int fd); extern void os_set_ioignore(void); extern void os_close_epoll_fd(void); +extern void um_irqs_suspend(void); +extern void um_irqs_resume(void); /* sigio.c */ extern int add_sigio_fd(int fd); extern int ignore_sigio_fd(int fd); -extern void maybe_sigio_broken(int fd, int read); -extern void sigio_broken(int fd, int read); +extern void maybe_sigio_broken(int fd); +extern void sigio_broken(int fd); +/* + * unlocked versions for IRQ controller code. + * + * This is safe because it's used at suspend/resume and nothing + * else is running. + */ +extern int __add_sigio_fd(int fd); +extern int __ignore_sigio_fd(int fd); /* prctl.c */ extern int os_arch_prctl(int pid, int option, unsigned long *arg2); @@ -330,4 +342,7 @@ extern void unblock_signals_trace(void); extern void um_trace_signals_on(void); extern void um_trace_signals_off(void); +/* time-travel */ +extern void deliver_time_travel_irqs(void); + #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 3577118bb4a5..3741d2380060 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -19,27 +19,40 @@ #include <kern_util.h> #include <os.h> #include <irq_user.h> +#include <irq_kern.h> +#include <as-layout.h> extern void free_irqs(void); /* When epoll triggers we do not know why it did so * we can also have different IRQs for read and write. - * This is why we keep a small irq_fd array for each fd - + * This is why we keep a small irq_reg array for each fd - * one entry per IRQ type */ +struct irq_reg { + void *id; + int irq; + /* it's cheaper to store this than to query it */ + int events; + bool active; + bool pending; + bool wakeup; +}; struct irq_entry { - struct irq_entry *next; + struct list_head list; int fd; - struct irq_fd *irq_array[MAX_IRQ_TYPE + 1]; + struct irq_reg reg[NUM_IRQ_TYPES]; + bool suspended; + bool sigio_workaround; }; -static struct irq_entry *active_fds; - static DEFINE_SPINLOCK(irq_lock); +static LIST_HEAD(active_fds); +static DECLARE_BITMAP(irqs_allocated, NR_IRQS); -static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs) +static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) { /* * irq->active guards against reentry @@ -49,23 +62,27 @@ static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs) */ if (irq->active) { irq->active = false; + do { irq->pending = false; do_IRQ(irq->irq, regs); - } while (irq->pending && (!irq->purge)); - if (!irq->purge) - irq->active = true; + } while (irq->pending); + + irq->active = true; } else { irq->pending = true; } } +void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + /* nothing */ +} + void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { struct irq_entry *irq_entry; - struct irq_fd *irq; - - int n, i, j; + int n, i; while (1) { /* This is now lockless - epoll keeps back-referencesto the irqs @@ -84,21 +101,18 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) } for (i = 0; i < n ; i++) { - /* Epoll back reference is the entry with 3 irq_fd - * leaves - one for each irq type. - */ - irq_entry = (struct irq_entry *) - os_epoll_get_data_pointer(i); - for (j = 0; j < MAX_IRQ_TYPE ; j++) { - irq = irq_entry->irq_array[j]; - if (irq == NULL) + enum um_irq_type t; + + irq_entry = os_epoll_get_data_pointer(i); + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + int events = irq_entry->reg[t].events; + + if (!events) continue; - if (os_epoll_triggered(i, irq->events) > 0) - irq_io_loop(irq, regs); - if (irq->purge) { - irq_entry->irq_array[j] = NULL; - kfree(irq); - } + + if (os_epoll_triggered(i, events) > 0) + irq_io_loop(&irq_entry->reg[t], regs); } } } @@ -106,32 +120,59 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) free_irqs(); } -static int assign_epoll_events_to_irq(struct irq_entry *irq_entry) +static struct irq_entry *get_irq_entry_by_fd(int fd) { - int i; - int events = 0; - struct irq_fd *irq; + struct irq_entry *walk; - for (i = 0; i < MAX_IRQ_TYPE ; i++) { - irq = irq_entry->irq_array[i]; - if (irq != NULL) - events = irq->events | events; - } - if (events > 0) { - /* os_add_epoll will call os_mod_epoll if this already exists */ - return os_add_epoll_fd(events, irq_entry->fd, irq_entry); + lockdep_assert_held(&irq_lock); + + list_for_each_entry(walk, &active_fds, list) { + if (walk->fd == fd) + return walk; } - /* No events - delete */ - return os_del_epoll_fd(irq_entry->fd); + + return NULL; } +static void free_irq_entry(struct irq_entry *to_free, bool remove) +{ + if (!to_free) + return; + + if (remove) + os_del_epoll_fd(to_free->fd); + list_del(&to_free->list); + kfree(to_free); +} + +static bool update_irq_entry(struct irq_entry *entry) +{ + enum um_irq_type i; + int events = 0; + + for (i = 0; i < NUM_IRQ_TYPES; i++) + events |= entry->reg[i].events; + + if (events) { + /* will modify (instead of add) if needed */ + os_add_epoll_fd(events, entry->fd, entry); + return true; + } + + os_del_epoll_fd(entry->fd); + return false; +} +static void update_or_free_irq_entry(struct irq_entry *entry) +{ + if (!update_irq_entry(entry)) + free_irq_entry(entry, false); +} -static int activate_fd(int irq, int fd, int type, void *dev_id) +static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id) { - struct irq_fd *new_fd; struct irq_entry *irq_entry; - int i, err, events; + int err, events = os_event_mask(type); unsigned long flags; err = os_set_fd_async(fd); @@ -139,73 +180,34 @@ static int activate_fd(int irq, int fd, int type, void *dev_id) goto out; spin_lock_irqsave(&irq_lock, flags); - - /* Check if we have an entry for this fd */ - - err = -EBUSY; - for (irq_entry = active_fds; - irq_entry != NULL; irq_entry = irq_entry->next) { - if (irq_entry->fd == fd) - break; - } - - if (irq_entry == NULL) { - /* This needs to be atomic as it may be called from an - * IRQ context. - */ - irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC); - if (irq_entry == NULL) { - printk(KERN_ERR - "Failed to allocate new IRQ entry\n"); + irq_entry = get_irq_entry_by_fd(fd); + if (irq_entry) { + /* cannot register the same FD twice with the same type */ + if (WARN_ON(irq_entry->reg[type].events)) { + err = -EALREADY; goto out_unlock; } - irq_entry->fd = fd; - for (i = 0; i < MAX_IRQ_TYPE; i++) - irq_entry->irq_array[i] = NULL; - irq_entry->next = active_fds; - active_fds = irq_entry; - } - - /* Check if we are trying to re-register an interrupt for a - * particular fd - */ - if (irq_entry->irq_array[type] != NULL) { - printk(KERN_ERR - "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n", - irq, fd, type, dev_id - ); - goto out_unlock; + /* temporarily disable to avoid IRQ-side locking */ + os_del_epoll_fd(fd); } else { - /* New entry for this fd */ - - err = -ENOMEM; - new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC); - if (new_fd == NULL) + irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); + if (!irq_entry) { + err = -ENOMEM; goto out_unlock; - - events = os_event_mask(type); - - *new_fd = ((struct irq_fd) { - .id = dev_id, - .irq = irq, - .type = type, - .events = events, - .active = true, - .pending = false, - .purge = false - }); - /* Turn off any IO on this fd - allows us to - * avoid locking the IRQ loop - */ - os_del_epoll_fd(irq_entry->fd); - irq_entry->irq_array[type] = new_fd; + } + irq_entry->fd = fd; + list_add_tail(&irq_entry->list, &active_fds); + maybe_sigio_broken(fd); } - /* Turn back IO on with the correct (new) IO event mask */ - assign_epoll_events_to_irq(irq_entry); + irq_entry->reg[type].id = dev_id; + irq_entry->reg[type].irq = irq; + irq_entry->reg[type].active = true; + irq_entry->reg[type].events = events; + + WARN_ON(!update_irq_entry(irq_entry)); spin_unlock_irqrestore(&irq_lock, flags); - maybe_sigio_broken(fd, (type != IRQ_NONE)); return 0; out_unlock: @@ -215,104 +217,10 @@ out: } /* - * Walk the IRQ list and dispose of any unused entries. - * Should be done under irq_lock. + * Remove the entry or entries for a specific FD, if you + * don't want to remove all the possible entries then use + * um_free_irq() or deactivate_fd() instead. */ - -static void garbage_collect_irq_entries(void) -{ - int i; - bool reap; - struct irq_entry *walk; - struct irq_entry *previous = NULL; - struct irq_entry *to_free; - - if (active_fds == NULL) - return; - walk = active_fds; - while (walk != NULL) { - reap = true; - for (i = 0; i < MAX_IRQ_TYPE ; i++) { - if (walk->irq_array[i] != NULL) { - reap = false; - break; - } - } - if (reap) { - if (previous == NULL) - active_fds = walk->next; - else - previous->next = walk->next; - to_free = walk; - } else { - to_free = NULL; - } - walk = walk->next; - kfree(to_free); - } -} - -/* - * Walk the IRQ list and get the descriptor for our FD - */ - -static struct irq_entry *get_irq_entry_by_fd(int fd) -{ - struct irq_entry *walk = active_fds; - - while (walk != NULL) { - if (walk->fd == fd) - return walk; - walk = walk->next; - } - return NULL; -} - - -/* - * Walk the IRQ list and dispose of an entry for a specific - * device, fd and number. Note - if sharing an IRQ for read - * and writefor the same FD it will be disposed in either case. - * If this behaviour is undesirable use different IRQ ids. - */ - -#define IGNORE_IRQ 1 -#define IGNORE_DEV (1<<1) - -static void do_free_by_irq_and_dev( - struct irq_entry *irq_entry, - unsigned int irq, - void *dev, - int flags -) -{ - int i; - struct irq_fd *to_free; - - for (i = 0; i < MAX_IRQ_TYPE ; i++) { - if (irq_entry->irq_array[i] != NULL) { - if ( - ((flags & IGNORE_IRQ) || - (irq_entry->irq_array[i]->irq == irq)) && - ((flags & IGNORE_DEV) || - (irq_entry->irq_array[i]->id == dev)) - ) { - /* Turn off any IO on this fd - allows us to - * avoid locking the IRQ loop - */ - os_del_epoll_fd(irq_entry->fd); - to_free = irq_entry->irq_array[i]; - irq_entry->irq_array[i] = NULL; - assign_epoll_events_to_irq(irq_entry); - if (to_free->active) - to_free->purge = true; - else - kfree(to_free); - } - } - } -} - void free_irq_by_fd(int fd) { struct irq_entry *to_free; @@ -320,58 +228,64 @@ void free_irq_by_fd(int fd) spin_lock_irqsave(&irq_lock, flags); to_free = get_irq_entry_by_fd(fd); - if (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - -1, - NULL, - IGNORE_IRQ | IGNORE_DEV - ); - } - garbage_collect_irq_entries(); + free_irq_entry(to_free, true); spin_unlock_irqrestore(&irq_lock, flags); } EXPORT_SYMBOL(free_irq_by_fd); static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { - struct irq_entry *to_free; + struct irq_entry *entry; unsigned long flags; spin_lock_irqsave(&irq_lock, flags); - to_free = active_fds; - while (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - irq, - dev, - 0 - ); - to_free = to_free->next; + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type i; + + for (i = 0; i < NUM_IRQ_TYPES; i++) { + struct irq_reg *reg = &entry->reg[i]; + + if (!reg->events) + continue; + if (reg->irq != irq) + continue; + if (reg->id != dev) + continue; + + os_del_epoll_fd(entry->fd); + reg->events = 0; + update_or_free_irq_entry(entry); + goto out; + } } - garbage_collect_irq_entries(); +out: spin_unlock_irqrestore(&irq_lock, flags); } - void deactivate_fd(int fd, int irqnum) { - struct irq_entry *to_free; + struct irq_entry *entry; unsigned long flags; + enum um_irq_type i; os_del_epoll_fd(fd); + spin_lock_irqsave(&irq_lock, flags); - to_free = get_irq_entry_by_fd(fd); - if (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - irqnum, - NULL, - IGNORE_DEV - ); + entry = get_irq_entry_by_fd(fd); + if (!entry) + goto out; + + for (i = 0; i < NUM_IRQ_TYPES; i++) { + if (!entry->reg[i].events) + continue; + if (entry->reg[i].irq == irqnum) + entry->reg[i].events = 0; } - garbage_collect_irq_entries(); + + update_or_free_irq_entry(entry); +out: spin_unlock_irqrestore(&irq_lock, flags); + ignore_sigio_fd(fd); } EXPORT_SYMBOL(deactivate_fd); @@ -384,24 +298,17 @@ EXPORT_SYMBOL(deactivate_fd); */ int deactivate_all_fds(void) { - struct irq_entry *to_free; + struct irq_entry *entry; /* Stop IO. The IRQ loop has no lock so this is our * only way of making sure we are safe to dispose * of all IRQ handlers */ os_set_ioignore(); - to_free = active_fds; - while (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - -1, - NULL, - IGNORE_IRQ | IGNORE_DEV - ); - to_free = to_free->next; - } - /* don't garbage collect - we can no longer call kfree() here */ + + /* we can no longer call kfree() here so just deactivate */ + list_for_each_entry(entry, &active_fds, list) + os_del_epoll_fd(entry->fd); os_close_epoll_fd(); return 0; } @@ -421,31 +328,146 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) return 1; } -void um_free_irq(unsigned int irq, void *dev) +void um_free_irq(int irq, void *dev) { + if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) + return; + free_irq_by_irq_and_dev(irq, dev); free_irq(irq, dev); + clear_bit(irq, irqs_allocated); } EXPORT_SYMBOL(um_free_irq); -int um_request_irq(unsigned int irq, int fd, int type, - irq_handler_t handler, - unsigned long irqflags, const char * devname, - void *dev_id) +int um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) { int err; + if (irq == UM_IRQ_ALLOC) { + int i; + + for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { + if (!test_and_set_bit(i, irqs_allocated)) { + irq = i; + break; + } + } + } + + if (irq < 0) + return -ENOSPC; + if (fd != -1) { err = activate_fd(irq, fd, type, dev_id); if (err) - return err; + goto error; } - return request_irq(irq, handler, irqflags, devname, dev_id); -} + err = request_irq(irq, handler, irqflags, devname, dev_id); + if (err < 0) + goto error; + return irq; +error: + clear_bit(irq, irqs_allocated); + return err; +} EXPORT_SYMBOL(um_request_irq); +#ifdef CONFIG_PM_SLEEP +void um_irqs_suspend(void) +{ + struct irq_entry *entry; + unsigned long flags; + + sig_info[SIGIO] = sigio_handler_suspend; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + bool wake = false; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + if (!entry->reg[t].events) + continue; + + /* + * For the SIGIO_WRITE_IRQ, which is used to handle the + * SIGIO workaround thread, we need special handling: + * enable wake for it itself, but below we tell it about + * any FDs that should be suspended. + */ + if (entry->reg[t].wakeup || + entry->reg[t].irq == SIGIO_WRITE_IRQ) { + wake = true; + break; + } + } + + if (!wake) { + entry->suspended = true; + os_clear_fd_async(entry->fd); + entry->sigio_workaround = + !__ignore_sigio_fd(entry->fd); + } + } + spin_unlock_irqrestore(&irq_lock, flags); +} + +void um_irqs_resume(void) +{ + struct irq_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + if (entry->suspended) { + int err = os_set_fd_async(entry->fd); + + WARN(err < 0, "os_set_fd_async returned %d\n", err); + entry->suspended = false; + + if (entry->sigio_workaround) { + err = __add_sigio_fd(entry->fd); + WARN(err < 0, "add_sigio_returned %d\n", err); + } + } + } + spin_unlock_irqrestore(&irq_lock, flags); + + sig_info[SIGIO] = sigio_handler; + send_sigio_to_self(); +} + +static int normal_irq_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + if (!entry->reg[t].events) + continue; + + if (entry->reg[t].irq != d->irq) + continue; + entry->reg[t].wakeup = on; + goto unlock; + } + } +unlock: + spin_unlock_irqrestore(&irq_lock, flags); + return 0; +} +#else +#define normal_irq_set_wake NULL +#endif + /* * irq_chip must define at least enable/disable and ack when * the edge handler is used. @@ -454,7 +476,7 @@ static void dummy(struct irq_data *d) { } -/* This is used for everything else than the timer. */ +/* This is used for everything other than the timer. */ static struct irq_chip normal_irq_type = { .name = "SIGIO", .irq_disable = dummy, @@ -462,10 +484,11 @@ static struct irq_chip normal_irq_type = { .irq_ack = dummy, .irq_mask = dummy, .irq_unmask = dummy, + .irq_set_wake = normal_irq_set_wake, }; -static struct irq_chip SIGVTALRM_irq_type = { - .name = "SIGVTALRM", +static struct irq_chip alarm_irq_type = { + .name = "SIGALRM", .irq_disable = dummy, .irq_enable = dummy, .irq_ack = dummy, @@ -477,10 +500,9 @@ void __init init_IRQ(void) { int i; - irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); - + irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); - for (i = 1; i <= LAST_IRQ; i++) + for (i = 1; i < NR_IRQS; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); /* Initialize EPOLL Loop */ os_setup_epoll(); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 0fcdc374a9a1..2a986ece5478 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,6 +32,7 @@ #include <os.h> #include <skas.h> #include <linux/time-internal.h> +#include <asm/set_memory.h> /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -62,16 +63,18 @@ void free_stack(unsigned long stack, int order) free_pages(stack, order); } -unsigned long alloc_stack(int order, int atomic) +unsigned long alloc_stack(int atomic) { - unsigned long page; + unsigned long addr; gfp_t flags = GFP_KERNEL; if (atomic) flags = GFP_ATOMIC; - page = __get_free_pages(flags, order); + addr = __get_free_pages(flags, 1); - return page; + set_memory_ro(addr, 1); + + return addr + PAGE_SIZE; } static inline void set_current(struct task_struct *task) @@ -203,15 +206,12 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } -static void um_idle_sleep(void) +void um_idle_sleep(void) { - unsigned long long duration = UM_NSEC_PER_SEC; - - if (time_travel_mode != TT_MODE_OFF) { - time_travel_sleep(duration); - } else { - os_idle_sleep(duration); - } + if (time_travel_mode != TT_MODE_OFF) + time_travel_sleep(); + else + os_idle_sleep(); } void arch_cpu_idle(void) diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index d1cffc2a7f21..5085a50c3b8c 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -25,7 +25,7 @@ int write_sigio_irq(int fd) err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, 0, "write sigio", NULL); - if (err) { + if (err < 0) { printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " "err = %d\n", err); return -1; diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 3d109ff3309b..f4db89b5b5a6 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -31,6 +31,7 @@ static bool time_travel_start_set; static unsigned long long time_travel_start; static unsigned long long time_travel_time; static LIST_HEAD(time_travel_events); +static LIST_HEAD(time_travel_irqs); static unsigned long long time_travel_timer_interval; static unsigned long long time_travel_next_event; static struct time_travel_event time_travel_timer_event; @@ -46,6 +47,9 @@ static void time_travel_set_time(unsigned long long ns) if (unlikely(ns < time_travel_time)) panic("time-travel: time goes backwards %lld -> %lld\n", time_travel_time, ns); + else if (unlikely(ns >= S64_MAX)) + panic("The system was going to sleep forever, aborting"); + time_travel_time = ns; } @@ -180,6 +184,14 @@ static void time_travel_ext_update_request(unsigned long long time) time == time_travel_ext_prev_request) return; + /* + * if we're running and are allowed to run past the request + * then we don't need to update it either + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && + time < time_travel_ext_free_until) + return; + time_travel_ext_prev_request = time; time_travel_ext_prev_request_valid = true; time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); @@ -187,7 +199,13 @@ static void time_travel_ext_update_request(unsigned long long time) void __time_travel_propagate_time(void) { + static unsigned long long last_propagated; + + if (last_propagated == time_travel_time) + return; + time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); + last_propagated = time_travel_time; } EXPORT_SYMBOL_GPL(__time_travel_propagate_time); @@ -214,6 +232,7 @@ static void time_travel_ext_wait(bool idle) }; time_travel_ext_prev_request_valid = false; + time_travel_ext_free_until_valid = false; time_travel_ext_waiting++; time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); @@ -260,11 +279,6 @@ static void __time_travel_add_event(struct time_travel_event *e, struct time_travel_event *tmp; bool inserted = false; - if (WARN(time_travel_mode == TT_MODE_BASIC && - e != &time_travel_timer_event, - "only timer events can be handled in basic mode")) - return; - if (e->pending) return; @@ -311,6 +325,35 @@ void time_travel_periodic_timer(struct time_travel_event *e) deliver_alarm(); } +void deliver_time_travel_irqs(void) +{ + struct time_travel_event *e; + unsigned long flags; + + /* + * Don't do anything for most cases. Note that because here we have + * to disable IRQs (and re-enable later) we'll actually recurse at + * the end of the function, so this is strictly necessary. + */ + if (likely(list_empty(&time_travel_irqs))) + return; + + local_irq_save(flags); + irq_enter(); + while ((e = list_first_entry_or_null(&time_travel_irqs, + struct time_travel_event, + list))) { + WARN(e->time != time_travel_time, + "time moved from %lld to %lld before IRQ delivery\n", + time_travel_time, e->time); + list_del(&e->list); + e->pending = false; + e->fn(e); + } + irq_exit(); + local_irq_restore(flags); +} + static void time_travel_deliver_event(struct time_travel_event *e) { if (e == &time_travel_timer_event) { @@ -319,6 +362,14 @@ static void time_travel_deliver_event(struct time_travel_event *e) * by itself, so must handle it specially here */ e->fn(e); + } else if (irqs_disabled()) { + list_add_tail(&e->list, &time_travel_irqs); + /* + * set pending again, it was set to false when the + * event was deleted from the original list, but + * now it's still pending until we deliver the IRQ. + */ + e->pending = true; } else { unsigned long flags; @@ -404,9 +455,14 @@ static void time_travel_oneshot_timer(struct time_travel_event *e) deliver_alarm(); } -void time_travel_sleep(unsigned long long duration) +void time_travel_sleep(void) { - unsigned long long next = time_travel_time + duration; + /* + * Wait "forever" (using S64_MAX because there are some potential + * wrapping issues, especially with the current TT_MODE_EXTERNAL + * controller application. + */ + unsigned long long next = S64_MAX; if (time_travel_mode == TT_MODE_BASIC) os_timer_disable(); @@ -483,6 +539,7 @@ invalid_number: #define time_travel_start_set 0 #define time_travel_start 0 #define time_travel_time 0 +#define time_travel_ext_waiting 0 static inline void time_travel_update_time(unsigned long long ns, bool retearly) { @@ -628,7 +685,8 @@ static u64 timer_read(struct clocksource *cs) * "what do I do next" and onstack event we use to know when * to return from time_travel_update_time(). */ - if (!irqs_disabled() && !in_interrupt() && !in_softirq()) + if (!irqs_disabled() && !in_interrupt() && !in_softirq() && + !time_travel_ext_waiting) time_travel_update_time(time_travel_time + TIMER_MULTIPLIER, false); @@ -673,10 +731,8 @@ void read_persistent_clock64(struct timespec64 *ts) { long long nsecs; - if (time_travel_start_set) + if (time_travel_mode != TT_MODE_OFF) nsecs = time_travel_start + time_travel_time; - else if (time_travel_mode == TT_MODE_EXTERNAL) - nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); else nsecs = os_persistent_clock_emulation(); @@ -686,6 +742,25 @@ void read_persistent_clock64(struct timespec64 *ts) void __init time_init(void) { +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + switch (time_travel_mode) { + case TT_MODE_EXTERNAL: + time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); + /* controller gave us the *current* time, so adjust by that */ + time_travel_ext_get_time(); + time_travel_start -= time_travel_time; + break; + case TT_MODE_INFCPU: + case TT_MODE_BASIC: + if (!time_travel_start_set) + time_travel_start = os_persistent_clock_emulation(); + break; + case TT_MODE_OFF: + /* we just read the host clock with os_persistent_clock_emulation() */ + break; + } +#endif + timer_set_signal_handler(); late_time_init = um_timer_setup; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 61776790cd67..437d1f1cc5ec 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -608,3 +608,57 @@ void force_flush_all(void) vma = vma->vm_next; } } + +struct page_change_data { + unsigned int set_mask, clear_mask; +}; + +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) +{ + struct page_change_data *cdata = data; + pte_t pte = READ_ONCE(*ptep); + + pte_clear_bits(pte, cdata->clear_mask); + pte_set_bits(pte, cdata->set_mask); + + set_pte(ptep, pte); + return 0; +} + +static int change_memory(unsigned long start, unsigned long pages, + unsigned int set_mask, unsigned int clear_mask) +{ + unsigned long size = pages * PAGE_SIZE; + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + + return ret; +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory(addr, numpages, 0, _PAGE_RW); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory(addr, numpages, _PAGE_RW, 0); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + return -EOPNOTSUPP; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return -EOPNOTSUPP; +} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 76b37297b7d4..31d356b1ffd8 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/kmsg_dump.h> +#include <linux/suspend.h> #include <asm/processor.h> #include <asm/sections.h> @@ -377,3 +378,69 @@ void *text_poke(void *addr, const void *opcode, size_t len) void text_poke_sync(void) { } + +void uml_pm_wake(void) +{ + pm_system_wakeup(); +} + +#ifdef CONFIG_PM_SLEEP +static int um_suspend_valid(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + +static int um_suspend_prepare(void) +{ + um_irqs_suspend(); + return 0; +} + +static int um_suspend_enter(suspend_state_t state) +{ + if (WARN_ON(state != PM_SUSPEND_MEM)) + return -EINVAL; + + /* + * This is identical to the idle sleep, but we've just + * (during suspend) turned off all interrupt sources + * except for the ones we want, so now we can only wake + * up on something we actually want to wake up on. All + * timing has also been suspended. + */ + um_idle_sleep(); + return 0; +} + +static void um_suspend_finish(void) +{ + um_irqs_resume(); +} + +const struct platform_suspend_ops um_suspend_ops = { + .valid = um_suspend_valid, + .prepare = um_suspend_prepare, + .enter = um_suspend_enter, + .finish = um_suspend_finish, +}; + +static int init_pm_wake_signal(void) +{ + /* + * In external time-travel mode we can't use signals to wake up + * since that would mess with the scheduling. We'll have to do + * some additional work to support wakeup on virtio devices or + * similar, perhaps implementing a fake RTC controller that can + * trigger wakeup (and request the appropriate scheduling from + * the external scheduler when going to suspend.) + */ + if (time_travel_mode != TT_MODE_EXTERNAL) + register_pm_wake_signal(); + + suspend_set_ops(&um_suspend_ops); + + return 0; +} + +late_initcall(init_pm_wake_signal); +#endif diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 839915b8c31c..77ac50baa3f8 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -10,6 +10,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ umid.o user_syms.o util.o drivers/ skas/ +CFLAGS_signal.o += -Wframe-larger-than=4096 + obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 9fa6e4187d4f..feb48d796e00 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -45,7 +45,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) unsigned long stack, sp; int pid, fds[2], ret, n; - stack = alloc_stack(0, __cant_sleep()); + stack = alloc_stack(__cant_sleep()); if (stack == 0) return -ENOMEM; @@ -116,7 +116,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(0, __cant_sleep()); + stack = alloc_stack(__cant_sleep()); if (stack == 0) return -ENOMEM; diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index d508310ee5e1..98ea910ef87c 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -45,10 +45,10 @@ int os_epoll_triggered(int index, int events) * access to the right includes/defines for EPOLL constants. */ -int os_event_mask(int irq_type) +int os_event_mask(enum um_irq_type irq_type) { if (irq_type == IRQ_READ) - return EPOLLIN | EPOLLPRI; + return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP; if (irq_type == IRQ_WRITE) return EPOLLOUT; return 0; diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 75558080d0bf..6597ea1986ff 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -164,45 +164,55 @@ static void update_thread(void) set_signals_trace(flags); } -int add_sigio_fd(int fd) +int __add_sigio_fd(int fd) { struct pollfd *p; - int err = 0, i, n; + int err, i, n; - sigio_lock(); for (i = 0; i < all_sigio_fds.used; i++) { if (all_sigio_fds.poll[i].fd == fd) break; } if (i == all_sigio_fds.used) - goto out; + return -ENOSPC; p = &all_sigio_fds.poll[i]; for (i = 0; i < current_poll.used; i++) { if (current_poll.poll[i].fd == fd) - goto out; + return 0; } n = current_poll.used; err = need_poll(&next_poll, n + 1); if (err) - goto out; + return err; memcpy(next_poll.poll, current_poll.poll, current_poll.used * sizeof(struct pollfd)); next_poll.poll[n] = *p; next_poll.used = n + 1; update_thread(); - out: + + return 0; +} + + +int add_sigio_fd(int fd) +{ + int err; + + sigio_lock(); + err = __add_sigio_fd(fd); sigio_unlock(); + return err; } -int ignore_sigio_fd(int fd) +int __ignore_sigio_fd(int fd) { struct pollfd *p; - int err = 0, i, n = 0; + int err, i, n = 0; /* * This is called from exitcalls elsewhere in UML - if @@ -212,17 +222,16 @@ int ignore_sigio_fd(int fd) if (write_sigio_pid == -1) return -EIO; - sigio_lock(); for (i = 0; i < current_poll.used; i++) { if (current_poll.poll[i].fd == fd) break; } if (i == current_poll.used) - goto out; + return -ENOENT; err = need_poll(&next_poll, current_poll.used - 1); if (err) - goto out; + return err; for (i = 0; i < current_poll.used; i++) { p = ¤t_poll.poll[i]; @@ -232,8 +241,18 @@ int ignore_sigio_fd(int fd) next_poll.used = current_poll.used - 1; update_thread(); - out: + + return 0; +} + +int ignore_sigio_fd(int fd) +{ + int err; + + sigio_lock(); + err = __ignore_sigio_fd(fd); sigio_unlock(); + return err; } @@ -336,7 +355,7 @@ out_close1: close(l_write_sigio_fds[1]); } -void sigio_broken(int fd, int read) +void sigio_broken(int fd) { int err; @@ -352,7 +371,7 @@ void sigio_broken(int fd, int read) all_sigio_fds.poll[all_sigio_fds.used++] = ((struct pollfd) { .fd = fd, - .events = read ? POLLIN : POLLOUT, + .events = POLLIN, .revents = 0 }); out: sigio_unlock(); @@ -360,17 +379,16 @@ out: /* Changed during early boot */ static int pty_output_sigio; -static int pty_close_sigio; -void maybe_sigio_broken(int fd, int read) +void maybe_sigio_broken(int fd) { if (!isatty(fd)) return; - if ((read || pty_output_sigio) && (!read || pty_close_sigio)) + if (pty_output_sigio) return; - sigio_broken(fd, read); + sigio_broken(fd); } static void sigio_cleanup(void) @@ -514,19 +532,6 @@ static void tty_output(int master, int slave) printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n); } -static void tty_close(int master, int slave) -{ - printk(UM_KERN_INFO "Checking that host ptys support SIGIO on " - "close..."); - - close(slave); - if (got_sigio) { - printk(UM_KERN_CONT "Yes\n"); - pty_close_sigio = 1; - } else - printk(UM_KERN_CONT "No, enabling workaround\n"); -} - static void __init check_sigio(void) { if ((access("/dev/ptmx", R_OK) < 0) && @@ -536,7 +541,6 @@ static void __init check_sigio(void) return; } check_one_sigio(tty_output); - check_one_sigio(tty_close); } /* Here because it only does the SIGIO testing for now */ diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index b58bc68cbe64..96f511d1aabe 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -136,6 +136,16 @@ void set_sigstack(void *sig_stack, int size) panic("enabling signal stack failed, errno = %d\n", errno); } +static void sigusr1_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) +{ + uml_pm_wake(); +} + +void register_pm_wake_signal(void) +{ + set_handler(SIGUSR1); +} + static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGSEGV] = sig_handler, [SIGBUS] = sig_handler, @@ -145,7 +155,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGIO] = sig_handler, [SIGWINCH] = sig_handler, - [SIGALRM] = timer_alarm_handler + [SIGALRM] = timer_alarm_handler, + + [SIGUSR1] = sigusr1_handler, }; static void hard_handler(int sig, siginfo_t *si, void *p) @@ -222,6 +234,11 @@ void set_handler(int sig) panic("sigprocmask failed - errno = %d\n", errno); } +void send_sigio_to_self(void) +{ + kill(os_getpid(), SIGIO); +} + int change_sig(int signal, int on) { sigset_t sigset; @@ -254,6 +271,9 @@ void unblock_signals(void) return; signals_enabled = 1; +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT + deliver_time_travel_irqs(); +#endif /* * We loop because the IRQ handler returns with interrupts off. So, diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 4fb877b99dde..0621d521208e 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -400,7 +400,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) if (WIFSTOPPED(status)) { int sig = WSTOPSIG(status); - ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); + /* These signal handlers need the si argument. + * The SIGIO and SIGALARM handlers which constitute the + * majority of invocations, do not use it. + */ + switch (sig) { + case SIGSEGV: + case SIGTRAP: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); + break; + } switch (sig) { case SIGSEGV: diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 90f6de224c70..a61cbf73a179 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -7,6 +7,7 @@ */ #include <stddef.h> +#include <unistd.h> #include <errno.h> #include <signal.h> #include <time.h> @@ -99,19 +100,9 @@ long long os_nsecs(void) } /** - * os_idle_sleep() - sleep for a given time of nsecs - * @nsecs: nanoseconds to sleep + * os_idle_sleep() - sleep until interrupted */ -void os_idle_sleep(unsigned long long nsecs) +void os_idle_sleep(void) { - struct timespec ts = { - .tv_sec = nsecs / UM_NSEC_PER_SEC, - .tv_nsec = nsecs % UM_NSEC_PER_SEC - }; - - /* - * Relay the signal if clock_nanosleep is interrupted. - */ - if (clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL)) - deliver_alarm(); + pause(); } diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 1d7558dac75f..a3dd61521d24 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -137,20 +137,13 @@ static inline int is_umdir_used(char *dir) { char pid[sizeof("nnnnnnnnn")], *end, *file; int dead, fd, p, n, err; - size_t filelen; + size_t filelen = strlen(dir) + sizeof("/pid") + 1; - err = asprintf(&file, "%s/pid", dir); - if (err < 0) - return 0; - - filelen = strlen(file); + file = malloc(filelen); + if (!file) + return -ENOMEM; - n = snprintf(file, filelen, "%s/pid", dir); - if (n >= filelen) { - printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); - err = -E2BIG; - goto out; - } + snprintf(file, filelen, "%s/pid", dir); dead = 0; fd = open(file, O_RDONLY); |