diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-02 13:20:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-02 13:20:11 -0700 |
commit | 9ae5fceb9a20154d74586fe17d1096b981b23e34 (patch) | |
tree | f11664ab46a7b6b4b9c07f41f5b509500705d1c8 /drivers | |
parent | a2d616b935a0df24bc9375785b19bf30d7fc9c6a (diff) | |
parent | 58e636039b512697554b579c2bb23774061877f5 (diff) |
Merge tag 'for-linus-5.15-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
- some small cleanups
- a fix for a bug when running as Xen PV guest which could result in
not all memory being transferred in case of a migration of the guest
- a small series for getting rid of code for supporting very old Xen
hypervisor versions nobody should be using since many years now
- a series for hardening the Xen block frontend driver
- a fix for Xen PV boot code issuing warning messages due to a stray
preempt_disable() on the non-boot processors
* tag 'for-linus-5.15-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen: remove stray preempt_disable() from PV AP startup code
xen/pcifront: Removed unnecessary __ref annotation
x86: xen: platform-pci-unplug: use pr_err() and pr_warn() instead of raw printk()
drivers/xen/xenbus/xenbus_client.c: fix bugon.cocci warnings
xen/blkfront: don't trust the backend response data blindly
xen/blkfront: don't take local copy of a request from the ring page
xen/blkfront: read response from backend only once
xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests
xen: assume XENFEAT_mmu_pt_update_preserve_ad being set for pv guests
xen: check required Xen features
xen: fix setting of max_pfn in shared_info
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/xen-blkfront.c | 126 | ||||
-rw-r--r-- | drivers/pci/xen-pcifront.c | 30 | ||||
-rw-r--r-- | drivers/xen/features.c | 18 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 36 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 9 |
5 files changed, 126 insertions, 93 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 715bfa8aca7f..72902104f111 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -80,6 +80,7 @@ enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, BLKIF_STATE_SUSPENDED, + BLKIF_STATE_ERROR, }; struct grant { @@ -89,6 +90,7 @@ struct grant { }; enum blk_req_status { + REQ_PROCESSING, REQ_WAITING, REQ_DONE, REQ_ERROR, @@ -530,10 +532,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, id = get_id_from_freelist(rinfo); rinfo->shadow[id].request = req; - rinfo->shadow[id].status = REQ_WAITING; + rinfo->shadow[id].status = REQ_PROCESSING; rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID; - (*ring_req)->u.rw.id = id; + rinfo->shadow[id].req.u.rw.id = id; return id; } @@ -541,11 +543,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo) { struct blkfront_info *info = rinfo->dev_info; - struct blkif_request *ring_req; + struct blkif_request *ring_req, *final_ring_req; unsigned long id; /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; ring_req->operation = BLKIF_OP_DISCARD; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); @@ -556,8 +559,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf else ring_req->u.discard.flag = 0; - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; + /* Copy the request to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; return 0; } @@ -690,6 +694,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri { struct blkfront_info *info = rinfo->dev_info; struct blkif_request *ring_req, *extra_ring_req = NULL; + struct blkif_request *final_ring_req, *final_extra_ring_req = NULL; unsigned long id, extra_id = NO_ASSOCIATED_ID; bool require_extra_req = false; int i; @@ -734,7 +739,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri } /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); num_grant = 0; @@ -785,7 +791,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri ring_req->u.rw.nr_segments = num_grant; if (unlikely(require_extra_req)) { extra_id = blkif_ring_get_request(rinfo, req, - &extra_ring_req); + &final_extra_ring_req); + extra_ring_req = &rinfo->shadow[extra_id].req; + /* * Only the first request contains the scatter-gather * list. @@ -827,10 +835,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri if (setup.segments) kunmap_atomic(setup.segments); - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; - if (unlikely(require_extra_req)) - rinfo->shadow[extra_id].req = *extra_ring_req; + /* Copy request(s) to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; + if (unlikely(require_extra_req)) { + *final_extra_ring_req = *extra_ring_req; + rinfo->shadow[extra_id].status = REQ_WAITING; + } if (new_persistent_gnts) gnttab_free_grant_references(setup.gref_head); @@ -1353,8 +1364,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp) static int blkif_get_final_status(enum blk_req_status s1, enum blk_req_status s2) { - BUG_ON(s1 == REQ_WAITING); - BUG_ON(s2 == REQ_WAITING); + BUG_ON(s1 < REQ_DONE); + BUG_ON(s2 < REQ_DONE); if (s1 == REQ_ERROR || s2 == REQ_ERROR) return BLKIF_RSP_ERROR; @@ -1387,7 +1398,7 @@ static bool blkif_completion(unsigned long *id, s->status = blkif_rsp_to_req_status(bret->status); /* Wait the second response if not yet here. */ - if (s2->status == REQ_WAITING) + if (s2->status < REQ_DONE) return false; bret->status = blkif_get_final_status(s->status, @@ -1495,7 +1506,7 @@ static bool blkif_completion(unsigned long *id, static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; - struct blkif_response *bret; + struct blkif_response bret; RING_IDX i, rp; unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; @@ -1506,54 +1517,76 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_lock_irqsave(&rinfo->ring_lock, flags); again: - rp = rinfo->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ + rp = READ_ONCE(rinfo->ring.sring->rsp_prod); + virt_rmb(); /* Ensure we see queued responses up to 'rp'. */ + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { + pr_alert("%s: illegal number of responses %u\n", + info->gd->disk_name, rp - rinfo->ring.rsp_cons); + goto err; + } for (i = rinfo->ring.rsp_cons; i != rp; i++) { unsigned long id; + unsigned int op; + + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); + id = bret.id; - bret = RING_GET_RESPONSE(&rinfo->ring, i); - id = bret->id; /* * The backend has messed up and given us an id that we would * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ if (id >= BLK_RING_SIZE(info)) { - WARN(1, "%s: response to %s has incorrect id (%ld)\n", - info->gd->disk_name, op_name(bret->operation), id); - /* We can't safely get the 'struct request' as - * the id is busted. */ - continue; + pr_alert("%s: response has incorrect id (%ld)\n", + info->gd->disk_name, id); + goto err; } + if (rinfo->shadow[id].status != REQ_WAITING) { + pr_alert("%s: response references no pending request\n", + info->gd->disk_name); + goto err; + } + + rinfo->shadow[id].status = REQ_PROCESSING; req = rinfo->shadow[id].request; - if (bret->operation != BLKIF_OP_DISCARD) { + op = rinfo->shadow[id].req.operation; + if (op == BLKIF_OP_INDIRECT) + op = rinfo->shadow[id].req.u.indirect.indirect_op; + if (bret.operation != op) { + pr_alert("%s: response has wrong operation (%u instead of %u)\n", + info->gd->disk_name, bret.operation, op); + goto err; + } + + if (bret.operation != BLKIF_OP_DISCARD) { /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, bret)) + if (!blkif_completion(&id, rinfo, &bret)) continue; } if (add_id_to_freelist(rinfo, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", - info->gd->disk_name, op_name(bret->operation), id); + info->gd->disk_name, op_name(bret.operation), id); continue; } - if (bret->status == BLKIF_RSP_OKAY) + if (bret.status == BLKIF_RSP_OKAY) blkif_req(req)->error = BLK_STS_OK; else blkif_req(req)->error = BLK_STS_IOERR; - switch (bret->operation) { + switch (bret.operation) { case BLKIF_OP_DISCARD: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -1563,15 +1596,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } - if (unlikely(bret->status == BLKIF_RSP_ERROR && + if (unlikely(bret.status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + pr_warn_ratelimited("blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } if (unlikely(blkif_req(req)->error)) { @@ -1584,9 +1617,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) fallthrough; case BLKIF_OP_READ: case BLKIF_OP_WRITE: - if (unlikely(bret->status != BLKIF_RSP_OKAY)) - dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " - "request: %x\n", bret->status); + if (unlikely(bret.status != BLKIF_RSP_OKAY)) + dev_dbg_ratelimited(&info->xbdev->dev, + "Bad return from blkdev data request: %#x\n", + bret.status); break; default: @@ -1612,6 +1646,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); return IRQ_HANDLED; + + err: + info->connected = BLKIF_STATE_ERROR; + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + + pr_alert("%s disabled for further use\n", info->gd->disk_name); + return IRQ_HANDLED; } diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index b7a8f3a1921f..427041c1e408 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -115,7 +115,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) struct xen_pci_op *active_op = &pdev->sh_info->op; unsigned long irq_flags; evtchn_port_t port = pdev->evtchn; - unsigned irq = pdev->irq; + unsigned int irq = pdev->irq; s64 ns, ns_timeout; spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); @@ -153,10 +153,10 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) } /* - * We might lose backend service request since we - * reuse same evtchn with pci_conf backend response. So re-schedule - * aer pcifront service. - */ + * We might lose backend service request since we + * reuse same evtchn with pci_conf backend response. So re-schedule + * aer pcifront service. + */ if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)) { dev_err(&pdev->xdev->dev, @@ -414,7 +414,8 @@ static int pcifront_scan_bus(struct pcifront_device *pdev, struct pci_dev *d; unsigned int devfn; - /* Scan the bus for functions and add. + /* + * Scan the bus for functions and add. * We omit handling of PCI bridge attachment because pciback prevents * bridges from being exported. */ @@ -492,8 +493,10 @@ static int pcifront_scan_root(struct pcifront_device *pdev, list_add(&bus_entry->list, &pdev->root_buses); - /* pci_scan_root_bus skips devices which do not have a - * devfn==0. The pcifront_scan_bus enumerates all devfn. */ + /* + * pci_scan_root_bus skips devices which do not have a + * devfn==0. The pcifront_scan_bus enumerates all devfn. + */ err = pcifront_scan_bus(pdev, domain, bus, b); /* Claim resources before going "live" with our devices */ @@ -651,8 +654,10 @@ static void pcifront_do_aer(struct work_struct *data) pci_channel_state_t state = (pci_channel_state_t)pdev->sh_info->aer_op.err; - /*If a pci_conf op is in progress, - we have to wait until it is done before service aer op*/ + /* + * If a pci_conf op is in progress, we have to wait until it is done + * before service aer op + */ dev_dbg(&pdev->xdev->dev, "pcifront service aer bus %x devfn %x\n", pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); @@ -676,6 +681,7 @@ static void pcifront_do_aer(struct work_struct *data) static irqreturn_t pcifront_handler_aer(int irq, void *dev) { struct pcifront_device *pdev = dev; + schedule_pcifront_aer_op(pdev); return IRQ_HANDLED; } @@ -1027,6 +1033,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev) /* Find devices being detached and remove them. */ for (i = 0; i < num_devs; i++) { int l, state; + l = snprintf(str, sizeof(str), "state-%d", i); if (unlikely(l >= (sizeof(str) - 1))) { err = -ENOMEM; @@ -1078,7 +1085,7 @@ out: return err; } -static void __ref pcifront_backend_changed(struct xenbus_device *xdev, +static void pcifront_backend_changed(struct xenbus_device *xdev, enum xenbus_state be_state) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); @@ -1137,6 +1144,7 @@ out: static int pcifront_xenbus_remove(struct xenbus_device *xdev) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + if (pdev) free_pdev(pdev); diff --git a/drivers/xen/features.c b/drivers/xen/features.c index 25c053b09605..7b591443833c 100644 --- a/drivers/xen/features.c +++ b/drivers/xen/features.c @@ -9,13 +9,26 @@ #include <linux/types.h> #include <linux/cache.h> #include <linux/export.h> +#include <linux/printk.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> #include <xen/features.h> +/* + * Linux kernel expects at least Xen 4.0. + * + * Assume some features to be available for that reason (depending on guest + * mode, of course). + */ +#define chk_required_feature(f) { \ + if (!xen_feature(f)) \ + panic("Xen: feature %s not available!\n", #f); \ + } + u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; EXPORT_SYMBOL_GPL(xen_features); @@ -31,4 +44,9 @@ void xen_setup_features(void) for (j = 0; j < 32; j++) xen_features[i * 32 + j] = !!(fi.submap & 1<<j); } + + if (xen_pv_domain()) { + chk_required_feature(XENFEAT_mmu_pt_update_preserve_ad); + chk_required_feature(XENFEAT_gnttab_map_avail_bits); + } } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a3e7be96527d..1e7f6b1c0c97 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -266,20 +266,13 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) { struct gntdev_grant_map *map = data; unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; - int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | + (1 << _GNTMAP_guest_avail0); u64 pte_maddr; BUG_ON(pgnr >= map->count); pte_maddr = arbitrary_virt_to_machine(pte).maddr; - /* - * Set the PTE as special to force get_user_pages_fast() fall - * back to the slow path. If this is not supported as part of - * the grant map, it will be done afterwards. - */ - if (xen_feature(XENFEAT_gnttab_map_avail_bits)) - flags |= (1 << _GNTMAP_guest_avail0); - gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, map->grants[pgnr].ref, map->grants[pgnr].domid); @@ -288,14 +281,6 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) return 0; } -#ifdef CONFIG_X86 -static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) -{ - set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); - return 0; -} -#endif - int gntdev_map_grant_pages(struct gntdev_grant_map *map) { int i, err = 0; @@ -1055,23 +1040,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) err = vm_map_pages_zero(vma, map->pages, map->count); if (err) goto out_put_map; - } else { -#ifdef CONFIG_X86 - /* - * If the PTEs were not made special by the grant map - * hypercall, do so here. - * - * This is racy since the mapping is already visible - * to userspace but userspace should be well-behaved - * enough to not touch it until the mmap() call - * returns. - */ - if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { - apply_to_page_range(vma->vm_mm, vma->vm_start, - vma->vm_end - vma->vm_start, - set_grant_ptes_as_special, NULL); - } -#endif } return 0; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 0cd728961fce..e8bed1cb76ba 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -542,8 +542,7 @@ static int __xenbus_map_ring(struct xenbus_device *dev, } } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j)) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j)); *leaked = false; for (i = 0; i < j; i++) { @@ -581,8 +580,7 @@ static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles, gnttab_set_unmap_op(&unmap[i], vaddrs[i], GNTMAP_host_map, handles[i]); - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)); err = GNTST_okay; for (i = 0; i < nr_handles; i++) { @@ -778,8 +776,7 @@ static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr) unmap[i].handle = node->handles[i]; } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)); err = GNTST_okay; leaked = false; |