Merge tag 'for-linus-5.15-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Sep 2021 20:20:11 +0000 (13:20 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Sep 2021 20:20:11 +0000 (13:20 -0700)
Pull xen updates from Juergen Gross:

 - some small cleanups

 - a fix for a bug when running as Xen PV guest which could result in
   not all memory being transferred in case of a migration of the guest

 - a small series for getting rid of code for supporting very old Xen
   hypervisor versions nobody should be using since many years now

 - a series for hardening the Xen block frontend driver

 - a fix for Xen PV boot code issuing warning messages due to a stray
   preempt_disable() on the non-boot processors

* tag 'for-linus-5.15-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: remove stray preempt_disable() from PV AP startup code
  xen/pcifront: Removed unnecessary __ref annotation
  x86: xen: platform-pci-unplug: use pr_err() and pr_warn() instead of raw printk()
  drivers/xen/xenbus/xenbus_client.c: fix bugon.cocci warnings
  xen/blkfront: don't trust the backend response data blindly
  xen/blkfront: don't take local copy of a request from the ring page
  xen/blkfront: read response from backend only once
  xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests
  xen: assume XENFEAT_mmu_pt_update_preserve_ad being set for pv guests
  xen: check required Xen features
  xen: fix setting of max_pfn in shared_info

arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/p2m.c
arch/x86/xen/platform-pci-unplug.c
arch/x86/xen/smp_pv.c
drivers/block/xen-blkfront.c
drivers/pci/xen-pcifront.c
drivers/xen/features.c
drivers/xen/gntdev.c
drivers/xen/xenbus/xenbus_client.c

index 0314942..753f637 100644 (file)
@@ -116,9 +116,8 @@ static void __init xen_banner(void)
        HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 
        pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
-       printk(KERN_INFO "Xen version: %d.%d%s%s\n",
-              version >> 16, version & 0xffff, extra.extraversion,
-              xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+       pr_info("Xen version: %d.%d%s (preserve-AD)\n",
+               version >> 16, version & 0xffff, extra.extraversion);
 }
 
 static void __init xen_pv_init_platform(void)
@@ -1302,13 +1301,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
        xen_init_apic();
 #endif
 
-       if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
-               pv_ops.mmu.ptep_modify_prot_start =
-                       xen_ptep_modify_prot_start;
-               pv_ops.mmu.ptep_modify_prot_commit =
-                       xen_ptep_modify_prot_commit;
-       }
-
        machine_ops = xen_machine_ops;
 
        /*
index ade789e..1df5f01 100644 (file)
@@ -2099,8 +2099,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
        .set_pte = xen_set_pte_init,
        .set_pmd = xen_set_pmd_hyper,
 
-       .ptep_modify_prot_start = __ptep_modify_prot_start,
-       .ptep_modify_prot_commit = __ptep_modify_prot_commit,
+       .ptep_modify_prot_start = xen_ptep_modify_prot_start,
+       .ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
 
        .pte_val = PV_CALLEE_SAVE(xen_pte_val),
        .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
index ac06ca3..5e6e236 100644 (file)
@@ -618,8 +618,8 @@ int xen_alloc_p2m_entry(unsigned long pfn)
        }
 
        /* Expanded the p2m? */
-       if (pfn > xen_p2m_last_pfn) {
-               xen_p2m_last_pfn = pfn;
+       if (pfn >= xen_p2m_last_pfn) {
+               xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE);
                HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
        }
 
index 96d7f7d..62ac489 100644 (file)
@@ -7,6 +7,8 @@
  * Copyright (c) 2010, Citrix
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/export.h>
@@ -30,13 +32,13 @@ static int check_platform_magic(void)
 
        magic = inw(XEN_IOPORT_MAGIC);
        if (magic != XEN_IOPORT_MAGIC_VAL) {
-               printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
+               pr_err("Xen Platform PCI: unrecognised magic value\n");
                return XEN_PLATFORM_ERR_MAGIC;
        }
 
        protocol = inb(XEN_IOPORT_PROTOVER);
 
-       printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
+       pr_debug("Xen Platform PCI: I/O protocol version %d\n",
                        protocol);
 
        switch (protocol) {
@@ -44,12 +46,12 @@ static int check_platform_magic(void)
                outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
                outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
                if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
-                       printk(KERN_ERR "Xen Platform: blacklisted by host\n");
+                       pr_err("Xen Platform: blacklisted by host\n");
                        return XEN_PLATFORM_ERR_BLACKLIST;
                }
                break;
        default:
-               printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version\n");
+               pr_warn("Xen Platform PCI: unknown I/O protocol version\n");
                return XEN_PLATFORM_ERR_PROTOCOL;
        }
 
@@ -155,12 +157,12 @@ void xen_unplug_emulated_devices(void)
         * been compiled for this kernel (modules or built-in are both OK). */
        if (!xen_emul_unplug) {
                if (xen_must_unplug_nics()) {
-                       printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
+                       pr_info("Netfront and the Xen platform PCI driver have "
                                        "been compiled for this kernel: unplug emulated NICs.\n");
                        xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
                }
                if (xen_must_unplug_disks()) {
-                       printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
+                       pr_info("Blkfront and the Xen platform PCI driver have "
                                        "been compiled for this kernel: unplug emulated disks.\n"
                                        "You might have to change the root device\n"
                                        "from /dev/hd[a-d] to /dev/xvd[a-d]\n"
@@ -200,7 +202,7 @@ static int __init parse_xen_emul_unplug(char *arg)
                else if (!strncmp(p, "never", l))
                        xen_emul_unplug |= XEN_UNPLUG_NEVER;
                else
-                       printk(KERN_WARNING "unrecognised option '%s' "
+                       pr_warn("unrecognised option '%s' "
                                 "in parameter 'xen_emul_unplug'\n", p);
        }
        return 0;
index c2ac319..96afadf 100644 (file)
@@ -64,7 +64,6 @@ static void cpu_bringup(void)
        cr4_init();
        cpu_init();
        touch_softlockup_watchdog();
-       preempt_disable();
 
        /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
        if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
index 715bfa8..7290210 100644 (file)
@@ -80,6 +80,7 @@ enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
        BLKIF_STATE_SUSPENDED,
+       BLKIF_STATE_ERROR,
 };
 
 struct grant {
@@ -89,6 +90,7 @@ struct grant {
 };
 
 enum blk_req_status {
+       REQ_PROCESSING,
        REQ_WAITING,
        REQ_DONE,
        REQ_ERROR,
@@ -530,10 +532,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
 
        id = get_id_from_freelist(rinfo);
        rinfo->shadow[id].request = req;
-       rinfo->shadow[id].status = REQ_WAITING;
+       rinfo->shadow[id].status = REQ_PROCESSING;
        rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
 
-       (*ring_req)->u.rw.id = id;
+       rinfo->shadow[id].req.u.rw.id = id;
 
        return id;
 }
@@ -541,11 +543,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
 static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
 {
        struct blkfront_info *info = rinfo->dev_info;
-       struct blkif_request *ring_req;
+       struct blkif_request *ring_req, *final_ring_req;
        unsigned long id;
 
        /* Fill out a communications ring structure. */
-       id = blkif_ring_get_request(rinfo, req, &ring_req);
+       id = blkif_ring_get_request(rinfo, req, &final_ring_req);
+       ring_req = &rinfo->shadow[id].req;
 
        ring_req->operation = BLKIF_OP_DISCARD;
        ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
@@ -556,8 +559,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
        else
                ring_req->u.discard.flag = 0;
 
-       /* Keep a private copy so we can reissue requests when recovering. */
-       rinfo->shadow[id].req = *ring_req;
+       /* Copy the request to the ring page. */
+       *final_ring_req = *ring_req;
+       rinfo->shadow[id].status = REQ_WAITING;
 
        return 0;
 }
@@ -690,6 +694,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 {
        struct blkfront_info *info = rinfo->dev_info;
        struct blkif_request *ring_req, *extra_ring_req = NULL;
+       struct blkif_request *final_ring_req, *final_extra_ring_req = NULL;
        unsigned long id, extra_id = NO_ASSOCIATED_ID;
        bool require_extra_req = false;
        int i;
@@ -734,7 +739,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
        }
 
        /* Fill out a communications ring structure. */
-       id = blkif_ring_get_request(rinfo, req, &ring_req);
+       id = blkif_ring_get_request(rinfo, req, &final_ring_req);
+       ring_req = &rinfo->shadow[id].req;
 
        num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
        num_grant = 0;
@@ -785,7 +791,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
                ring_req->u.rw.nr_segments = num_grant;
                if (unlikely(require_extra_req)) {
                        extra_id = blkif_ring_get_request(rinfo, req,
-                                                         &extra_ring_req);
+                                                         &final_extra_ring_req);
+                       extra_ring_req = &rinfo->shadow[extra_id].req;
+
                        /*
                         * Only the first request contains the scatter-gather
                         * list.
@@ -827,10 +835,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
        if (setup.segments)
                kunmap_atomic(setup.segments);
 
-       /* Keep a private copy so we can reissue requests when recovering. */
-       rinfo->shadow[id].req = *ring_req;
-       if (unlikely(require_extra_req))
-               rinfo->shadow[extra_id].req = *extra_ring_req;
+       /* Copy request(s) to the ring page. */
+       *final_ring_req = *ring_req;
+       rinfo->shadow[id].status = REQ_WAITING;
+       if (unlikely(require_extra_req)) {
+               *final_extra_ring_req = *extra_ring_req;
+               rinfo->shadow[extra_id].status = REQ_WAITING;
+       }
 
        if (new_persistent_gnts)
                gnttab_free_grant_references(setup.gref_head);
@@ -1353,8 +1364,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp)
 static int blkif_get_final_status(enum blk_req_status s1,
                                  enum blk_req_status s2)
 {
-       BUG_ON(s1 == REQ_WAITING);
-       BUG_ON(s2 == REQ_WAITING);
+       BUG_ON(s1 < REQ_DONE);
+       BUG_ON(s2 < REQ_DONE);
 
        if (s1 == REQ_ERROR || s2 == REQ_ERROR)
                return BLKIF_RSP_ERROR;
@@ -1387,7 +1398,7 @@ static bool blkif_completion(unsigned long *id,
                s->status = blkif_rsp_to_req_status(bret->status);
 
                /* Wait the second response if not yet here. */
-               if (s2->status == REQ_WAITING)
+               if (s2->status < REQ_DONE)
                        return false;
 
                bret->status = blkif_get_final_status(s->status,
@@ -1495,7 +1506,7 @@ static bool blkif_completion(unsigned long *id,
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 {
        struct request *req;
-       struct blkif_response *bret;
+       struct blkif_response bret;
        RING_IDX i, rp;
        unsigned long flags;
        struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
@@ -1506,54 +1517,76 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
        spin_lock_irqsave(&rinfo->ring_lock, flags);
  again:
-       rp = rinfo->ring.sring->rsp_prod;
-       rmb(); /* Ensure we see queued responses up to 'rp'. */
+       rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
+       virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
+       if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
+               pr_alert("%s: illegal number of responses %u\n",
+                        info->gd->disk_name, rp - rinfo->ring.rsp_cons);
+               goto err;
+       }
 
        for (i = rinfo->ring.rsp_cons; i != rp; i++) {
                unsigned long id;
+               unsigned int op;
+
+               RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
+               id = bret.id;
 
-               bret = RING_GET_RESPONSE(&rinfo->ring, i);
-               id   = bret->id;
                /*
                 * The backend has messed up and given us an id that we would
                 * never have given to it (we stamp it up to BLK_RING_SIZE -
                 * look in get_id_from_freelist.
                 */
                if (id >= BLK_RING_SIZE(info)) {
-                       WARN(1, "%s: response to %s has incorrect id (%ld)\n",
-                            info->gd->disk_name, op_name(bret->operation), id);
-                       /* We can't safely get the 'struct request' as
-                        * the id is busted. */
-                       continue;
+                       pr_alert("%s: response has incorrect id (%ld)\n",
+                                info->gd->disk_name, id);
+                       goto err;
                }
+               if (rinfo->shadow[id].status != REQ_WAITING) {
+                       pr_alert("%s: response references no pending request\n",
+                                info->gd->disk_name);
+                       goto err;
+               }
+
+               rinfo->shadow[id].status = REQ_PROCESSING;
                req  = rinfo->shadow[id].request;
 
-               if (bret->operation != BLKIF_OP_DISCARD) {
+               op = rinfo->shadow[id].req.operation;
+               if (op == BLKIF_OP_INDIRECT)
+                       op = rinfo->shadow[id].req.u.indirect.indirect_op;
+               if (bret.operation != op) {
+                       pr_alert("%s: response has wrong operation (%u instead of %u)\n",
+                                info->gd->disk_name, bret.operation, op);
+                       goto err;
+               }
+
+               if (bret.operation != BLKIF_OP_DISCARD) {
                        /*
                         * We may need to wait for an extra response if the
                         * I/O request is split in 2
                         */
-                       if (!blkif_completion(&id, rinfo, bret))
+                       if (!blkif_completion(&id, rinfo, &bret))
                                continue;
                }
 
                if (add_id_to_freelist(rinfo, id)) {
                        WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
-                            info->gd->disk_name, op_name(bret->operation), id);
+                            info->gd->disk_name, op_name(bret.operation), id);
                        continue;
                }
 
-               if (bret->status == BLKIF_RSP_OKAY)
+               if (bret.status == BLKIF_RSP_OKAY)
                        blkif_req(req)->error = BLK_STS_OK;
                else
                        blkif_req(req)->error = BLK_STS_IOERR;
 
-               switch (bret->operation) {
+               switch (bret.operation) {
                case BLKIF_OP_DISCARD:
-                       if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+                       if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
                                struct request_queue *rq = info->rq;
-                               printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-                                          info->gd->disk_name, op_name(bret->operation));
+
+                               pr_warn_ratelimited("blkfront: %s: %s op failed\n",
+                                          info->gd->disk_name, op_name(bret.operation));
                                blkif_req(req)->error = BLK_STS_NOTSUPP;
                                info->feature_discard = 0;
                                info->feature_secdiscard = 0;
@@ -1563,15 +1596,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                        break;
                case BLKIF_OP_FLUSH_DISKCACHE:
                case BLKIF_OP_WRITE_BARRIER:
-                       if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
-                               printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-                                      info->gd->disk_name, op_name(bret->operation));
+                       if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
+                               pr_warn_ratelimited("blkfront: %s: %s op failed\n",
+                                      info->gd->disk_name, op_name(bret.operation));
                                blkif_req(req)->error = BLK_STS_NOTSUPP;
                        }
-                       if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+                       if (unlikely(bret.status == BLKIF_RSP_ERROR &&
                                     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
-                               printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
-                                      info->gd->disk_name, op_name(bret->operation));
+                               pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
+                                      info->gd->disk_name, op_name(bret.operation));
                                blkif_req(req)->error = BLK_STS_NOTSUPP;
                        }
                        if (unlikely(blkif_req(req)->error)) {
@@ -1584,9 +1617,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                        fallthrough;
                case BLKIF_OP_READ:
                case BLKIF_OP_WRITE:
-                       if (unlikely(bret->status != BLKIF_RSP_OKAY))
-                               dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
-                                       "request: %x\n", bret->status);
+                       if (unlikely(bret.status != BLKIF_RSP_OKAY))
+                               dev_dbg_ratelimited(&info->xbdev->dev,
+                                       "Bad return from blkdev data request: %#x\n",
+                                       bret.status);
 
                        break;
                default:
@@ -1612,6 +1646,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
        spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 
        return IRQ_HANDLED;
+
+ err:
+       info->connected = BLKIF_STATE_ERROR;
+
+       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+
+       pr_alert("%s disabled for further use\n", info->gd->disk_name);
+       return IRQ_HANDLED;
 }
 
 
index b7a8f3a..427041c 100644 (file)
@@ -115,7 +115,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
        struct xen_pci_op *active_op = &pdev->sh_info->op;
        unsigned long irq_flags;
        evtchn_port_t port = pdev->evtchn;
-       unsigned irq = pdev->irq;
+       unsigned int irq = pdev->irq;
        s64 ns, ns_timeout;
 
        spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
@@ -153,10 +153,10 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
        }
 
        /*
-       * We might lose backend service request since we
-       * reuse same evtchn with pci_conf backend response. So re-schedule
-       * aer pcifront service.
-       */
+        * We might lose backend service request since we
+        * reuse same evtchn with pci_conf backend response. So re-schedule
+        * aer pcifront service.
+        */
        if (test_bit(_XEN_PCIB_active,
                        (unsigned long *)&pdev->sh_info->flags)) {
                dev_err(&pdev->xdev->dev,
@@ -414,7 +414,8 @@ static int pcifront_scan_bus(struct pcifront_device *pdev,
        struct pci_dev *d;
        unsigned int devfn;
 
-       /* Scan the bus for functions and add.
+       /*
+        * Scan the bus for functions and add.
         * We omit handling of PCI bridge attachment because pciback prevents
         * bridges from being exported.
         */
@@ -492,8 +493,10 @@ static int pcifront_scan_root(struct pcifront_device *pdev,
 
        list_add(&bus_entry->list, &pdev->root_buses);
 
-       /* pci_scan_root_bus skips devices which do not have a
-       * devfn==0. The pcifront_scan_bus enumerates all devfn. */
+       /*
+        * pci_scan_root_bus skips devices which do not have a
+        * devfn==0. The pcifront_scan_bus enumerates all devfn.
+        */
        err = pcifront_scan_bus(pdev, domain, bus, b);
 
        /* Claim resources before going "live" with our devices */
@@ -651,8 +654,10 @@ static void pcifront_do_aer(struct work_struct *data)
        pci_channel_state_t state =
                (pci_channel_state_t)pdev->sh_info->aer_op.err;
 
-       /*If a pci_conf op is in progress,
-               we have to wait until it is done before service aer op*/
+       /*
+        * If a pci_conf op is in progress, we have to wait until it is done
+        * before service aer op
+        */
        dev_dbg(&pdev->xdev->dev,
                "pcifront service aer bus %x devfn %x\n",
                pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
@@ -676,6 +681,7 @@ static void pcifront_do_aer(struct work_struct *data)
 static irqreturn_t pcifront_handler_aer(int irq, void *dev)
 {
        struct pcifront_device *pdev = dev;
+
        schedule_pcifront_aer_op(pdev);
        return IRQ_HANDLED;
 }
@@ -1027,6 +1033,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev)
        /* Find devices being detached and remove them. */
        for (i = 0; i < num_devs; i++) {
                int l, state;
+
                l = snprintf(str, sizeof(str), "state-%d", i);
                if (unlikely(l >= (sizeof(str) - 1))) {
                        err = -ENOMEM;
@@ -1078,7 +1085,7 @@ out:
        return err;
 }
 
-static void __ref pcifront_backend_changed(struct xenbus_device *xdev,
+static void pcifront_backend_changed(struct xenbus_device *xdev,
                                                  enum xenbus_state be_state)
 {
        struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
@@ -1137,6 +1144,7 @@ out:
 static int pcifront_xenbus_remove(struct xenbus_device *xdev)
 {
        struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+
        if (pdev)
                free_pdev(pdev);
 
index 25c053b..7b59144 100644 (file)
@@ -9,13 +9,26 @@
 #include <linux/types.h>
 #include <linux/cache.h>
 #include <linux/export.h>
+#include <linux/printk.h>
 
 #include <asm/xen/hypercall.h>
 
+#include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
 #include <xen/features.h>
 
+/*
+ * Linux kernel expects at least Xen 4.0.
+ *
+ * Assume some features to be available for that reason (depending on guest
+ * mode, of course).
+ */
+#define chk_required_feature(f) {                                      \
+               if (!xen_feature(f))                                    \
+                       panic("Xen: feature %s not available!\n", #f);  \
+       }
+
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
 EXPORT_SYMBOL_GPL(xen_features);
 
@@ -31,4 +44,9 @@ void xen_setup_features(void)
                for (j = 0; j < 32; j++)
                        xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
        }
+
+       if (xen_pv_domain()) {
+               chk_required_feature(XENFEAT_mmu_pt_update_preserve_ad);
+               chk_required_feature(XENFEAT_gnttab_map_avail_bits);
+       }
 }
index a3e7be9..1e7f6b1 100644 (file)
@@ -266,20 +266,13 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 {
        struct gntdev_grant_map *map = data;
        unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
-       int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
+       int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
+                   (1 << _GNTMAP_guest_avail0);
        u64 pte_maddr;
 
        BUG_ON(pgnr >= map->count);
        pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 
-       /*
-        * Set the PTE as special to force get_user_pages_fast() fall
-        * back to the slow path.  If this is not supported as part of
-        * the grant map, it will be done afterwards.
-        */
-       if (xen_feature(XENFEAT_gnttab_map_avail_bits))
-               flags |= (1 << _GNTMAP_guest_avail0);
-
        gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
                          map->grants[pgnr].ref,
                          map->grants[pgnr].domid);
@@ -288,14 +281,6 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
        return 0;
 }
 
-#ifdef CONFIG_X86
-static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
-{
-       set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
-       return 0;
-}
-#endif
-
 int gntdev_map_grant_pages(struct gntdev_grant_map *map)
 {
        int i, err = 0;
@@ -1055,23 +1040,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
                err = vm_map_pages_zero(vma, map->pages, map->count);
                if (err)
                        goto out_put_map;
-       } else {
-#ifdef CONFIG_X86
-               /*
-                * If the PTEs were not made special by the grant map
-                * hypercall, do so here.
-                *
-                * This is racy since the mapping is already visible
-                * to userspace but userspace should be well-behaved
-                * enough to not touch it until the mmap() call
-                * returns.
-                */
-               if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) {
-                       apply_to_page_range(vma->vm_mm, vma->vm_start,
-                                           vma->vm_end - vma->vm_start,
-                                           set_grant_ptes_as_special, NULL);
-               }
-#endif
        }
 
        return 0;
index 0cd7289..e8bed1c 100644 (file)
@@ -542,8 +542,7 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
                }
        }
 
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j))
-               BUG();
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
 
        *leaked = false;
        for (i = 0; i < j; i++) {
@@ -581,8 +580,7 @@ static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
                gnttab_set_unmap_op(&unmap[i], vaddrs[i],
                                    GNTMAP_host_map, handles[i]);
 
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
-               BUG();
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
 
        err = GNTST_okay;
        for (i = 0; i < nr_handles; i++) {
@@ -778,8 +776,7 @@ static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
                unmap[i].handle = node->handles[i];
        }
 
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
-               BUG();
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
 
        err = GNTST_okay;
        leaked = false;