Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Dec 2020 20:06:46 +0000 (12:06 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Dec 2020 20:06:46 +0000 (12:06 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Dec 2020 20:06:46 +0000 (12:06 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Dec 2020 20:06:46 +0000 (12:06 -0800)
diff --combined drivers/net/virtio_net.c

index 052975e,34bb95d..4c41df6
--- 1/drivers/net/virtio_net.c
--- 2/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@@ -1485,7 -1485,7 +1485,7 @@@ static int virtnet_open(struct net_devi
                         if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
                                 schedule_delayed_work(&vi->refill, 0);
   
- -              err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
+ +              err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id);
                 if (err < 0)
                         return err;
   
@@@ -3072,6 -3072,7 +3072,7 @@@ static int virtnet_probe(struct virtio_
                         dev_err(&vdev->dev,
                                 "device MTU appears to have changed it is now %d < %d",
                                 mtu, dev->min_mtu);
+                       err = -EINVAL;
                         goto free;
                 }
   
diff --combined drivers/vdpa/mlx5/net/mlx5_vnet.c

index f1d5481,81b932f..88dde34
--- 1/drivers/vdpa/mlx5/net/mlx5_vnet.c
--- 2/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@@ -1,28 -1,18 +1,28 @@@
   // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   /* Copyright (c) 2020 Mellanox Technologies Ltd. */
   
+ +#include <linux/module.h>
   #include <linux/vdpa.h>
+ +#include <linux/vringh.h>
+ +#include <uapi/linux/virtio_net.h>
   #include <uapi/linux/virtio_ids.h>
   #include <linux/virtio_config.h>
+ +#include <linux/auxiliary_bus.h>
+ +#include <linux/mlx5/cq.h>
   #include <linux/mlx5/qp.h>
   #include <linux/mlx5/device.h>
+ +#include <linux/mlx5/driver.h>
   #include <linux/mlx5/vport.h>
   #include <linux/mlx5/fs.h>
- -#include <linux/mlx5/device.h>
- -#include "mlx5_vnet.h"
- -#include "mlx5_vdpa_ifc.h"
+ +#include <linux/mlx5/mlx5_ifc_vdpa.h>
   #include "mlx5_vdpa.h"
   
+ +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+ +MODULE_DESCRIPTION("Mellanox VDPA driver");
+ +MODULE_LICENSE("Dual BSD/GPL");
+ +
+ +#define to_mlx5_vdpa_ndev(__mvdev)                                             \
+ +      container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
   #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
   
   #define VALID_FEATURES_MASK                                                                        \
@@@ -169,11 -159,6 +169,11 @@@ static bool mlx5_vdpa_debug
                         mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
         } while (0)
   
+ +static inline u32 mlx5_vdpa_max_qps(int max_vqs)
+ +{
+ +      return max_vqs / 2;
+ +}
+ +
   static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
   {
         if (status & ~VALID_STATUS_MASK)
@@@ -479,6 -464,11 +479,11 @@@ static int mlx5_vdpa_poll_one(struct ml
   static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
   {
         mlx5_cq_set_ci(&mvq->cq.mcq);
+ 
+       /* make sure CQ cosumer update is visible to the hardware before updating
+        * RX doorbell record.
+        */
+       dma_wmb();
         rx_post(&mvq->vqqp, num);
         if (mvq->event_cb.callback)
                 mvq->event_cb.callback(mvq->event_cb.private);
@@@ -1943,11 -1933,8 +1948,11 @@@ static void init_mvqs(struct mlx5_vdpa_
         }
   }
   
- -void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
+ +static int mlx5v_probe(struct auxiliary_device *adev,
+ +                     const struct auxiliary_device_id *id)
   {
+ +      struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+ +      struct mlx5_core_dev *mdev = madev->mdev;
         struct virtio_net_config *config;
         struct mlx5_vdpa_dev *mvdev;
         struct mlx5_vdpa_net *ndev;
@@@ -1961,7 -1948,7 +1966,7 @@@
         ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
                                  2 * mlx5_vdpa_max_qps(max_vqs));
         if (IS_ERR(ndev))
- -              return ndev;
+ +              return PTR_ERR(ndev);
   
         ndev->mvdev.max_vqs = max_vqs;
         mvdev = &ndev->mvdev;
@@@ -1990,8 -1977,7 +1995,8 @@@
         if (err)
                 goto err_reg;
   
- -      return ndev;
+ +      dev_set_drvdata(&adev->dev, ndev);
+ +      return 0;
   
   err_reg:
         free_resources(ndev);
@@@ -2000,28 -1986,10 +2005,28 @@@ err_res
   err_mtu:
         mutex_destroy(&ndev->reslock);
         put_device(&mvdev->vdev.dev);
- -      return ERR_PTR(err);
+ +      return err;
   }
   
- -void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev)
+ +static void mlx5v_remove(struct auxiliary_device *adev)
   {
+ +      struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(&adev->dev);
+ +
         vdpa_unregister_device(&mvdev->vdev);
   }
+ +
+ +static const struct auxiliary_device_id mlx5v_id_table[] = {
+ +      { .name = MLX5_ADEV_NAME ".vnet", },
+ +      {},
+ +};
+ +
+ +MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
+ +
+ +static struct auxiliary_driver mlx5v_driver = {
+ +      .name = "vnet",
+ +      .probe = mlx5v_probe,
+ +      .remove = mlx5v_remove,
+ +      .id_table = mlx5v_id_table,
+ +};
+ +
+ +module_auxiliary_driver(mlx5v_driver);
diff --combined mm/memory_hotplug.c

index c016042,2b6cc42..af41fb9
--- 1/mm/memory_hotplug.c
--- 2/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@@ -596,7 -596,8 +596,7 @@@ void generic_online_page(struct page *p
          * so we should map it first. This is better than introducing a special
          * case in page freeing fast path.
          */
- -      if (debug_pagealloc_enabled_static())
- -              kernel_map_pages(page, 1 << order, 1);
+ +      debug_pagealloc_map_pages(page, 1 << order);
         __free_pages_core(page, order);
         totalram_pages_add(1UL << order);
   #ifdef CONFIG_HIGHMEM
@@@ -1303,7 -1304,7 +1303,7 @@@ do_migrate_range(unsigned long start_pf
                         if (WARN_ON(PageLRU(page)))
                                 isolate_lru_page(page);
                         if (page_mapped(page))
- -                              try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
+ +                              try_to_unmap(page, TTU_IGNORE_MLOCK);
                         continue;
                 }
   
@@@ -1491,19 -1492,13 +1491,19 @@@ int __ref offline_pages(unsigned long s
         }
         node = zone_to_nid(zone);
   
+ +      /*
+ +       * Disable pcplists so that page isolation cannot race with freeing
+ +       * in a way that pages from isolated pageblock are left on pcplists.
+ +       */
+ +      zone_pcp_disable(zone);
+ +
         /* set above range as isolated */
         ret = start_isolate_page_range(start_pfn, end_pfn,
                                        MIGRATE_MOVABLE,
                                        MEMORY_OFFLINE | REPORT_FAILURE);
         if (ret) {
                 reason = "failure to isolate range";
- -              goto failed_removal;
+ +              goto failed_removal_pcplists_disabled;
         }
   
         arg.start_pfn = start_pfn;
@@@ -1555,13 -1550,26 +1555,13 @@@
                         goto failed_removal_isolated;
                 }
   
- -              /*
- -               * per-cpu pages are drained in start_isolate_page_range, but if
- -               * there are still pages that are not free, make sure that we
- -               * drain again, because when we isolated range we might
- -               * have raced with another thread that was adding pages to pcp
- -               * list.
- -               *
- -               * Forward progress should be still guaranteed because
- -               * pages on the pcp list can only belong to MOVABLE_ZONE
- -               * because has_unmovable_pages explicitly checks for
- -               * PageBuddy on freed pages on other zones.
- -               */
                 ret = test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE);
- -              if (ret)
- -                      drain_all_pages(zone);
+ +
         } while (ret);
   
         /* Mark all sections offline and remove free pages from the buddy. */
         __offline_isolated_pages(start_pfn, end_pfn);
- -      pr_info("Offlined Pages %ld\n", nr_pages);
+ +      pr_debug("Offlined Pages %ld\n", nr_pages);
   
         /*
          * The memory sections are marked offline, and the pageblock flags
@@@ -1572,8 -1580,6 +1572,8 @@@
         zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
         spin_unlock_irqrestore(&zone->lock, flags);
   
+ +      zone_pcp_enable(zone);
+ +
         /* removal success */
         adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
         zone->present_pages -= nr_pages;
@@@ -1606,8 -1612,6 +1606,8 @@@
   failed_removal_isolated:
         undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
         memory_notify(MEM_CANCEL_OFFLINE, &arg);
+ +failed_removal_pcplists_disabled:
+ +      zone_pcp_enable(zone);
   failed_removal:
         pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
                  (unsigned long long) start_pfn << PAGE_SHIFT,
@@@ -1784,39 -1788,112 +1784,112 @@@ int remove_memory(int nid, u64 start, u
   }
   EXPORT_SYMBOL_GPL(remove_memory);
   
+ static int try_offline_memory_block(struct memory_block *mem, void *arg)
+ {
+       uint8_t online_type = MMOP_ONLINE_KERNEL;
+       uint8_t **online_types = arg;
+       struct page *page;
+       int rc;
+ 
+       /*
+        * Sense the online_type via the zone of the memory block. Offlining
+        * with multiple zones within one memory block will be rejected
+        * by offlining code ... so we don't care about that.
+        */
+       page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
+       if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
+               online_type = MMOP_ONLINE_MOVABLE;
+ 
+       rc = device_offline(&mem->dev);
+       /*
+        * Default is MMOP_OFFLINE - change it only if offlining succeeded,
+        * so try_reonline_memory_block() can do the right thing.
+        */
+       if (!rc)
+               **online_types = online_type;
+ 
+       (*online_types)++;
+       /* Ignore if already offline. */
+       return rc < 0 ? rc : 0;
+ }
+ 
+ static int try_reonline_memory_block(struct memory_block *mem, void *arg)
+ {
+       uint8_t **online_types = arg;
+       int rc;
+ 
+       if (**online_types != MMOP_OFFLINE) {
+               mem->online_type = **online_types;
+               rc = device_online(&mem->dev);
+               if (rc < 0)
+                       pr_warn("%s: Failed to re-online memory: %d",
+                               __func__, rc);
+       }
+ 
+       /* Continue processing all remaining memory blocks. */
+       (*online_types)++;
+       return 0;
+ }
+ 
   /*
-  * Try to offline and remove a memory block. Might take a long time to
-  * finish in case memory is still in use. Primarily useful for memory devices
-  * that logically unplugged all memory (so it's no longer in use) and want to
-  * offline + remove the memory block.
+  * Try to offline and remove memory. Might take a long time to finish in case
+  * memory is still in use. Primarily useful for memory devices that logically
+  * unplugged all memory (so it's no longer in use) and want to offline + remove
+  * that memory.
    */
   int offline_and_remove_memory(int nid, u64 start, u64 size)
   {
-       struct memory_block *mem;
-       int rc = -EINVAL;
+       const unsigned long mb_count = size / memory_block_size_bytes();
+       uint8_t *online_types, *tmp;
+       int rc;
   
         if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
-           size != memory_block_size_bytes())
-               return rc;
+           !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
+               return -EINVAL;
+ 
+       /*
+        * We'll remember the old online type of each memory block, so we can
+        * try to revert whatever we did when offlining one memory block fails
+        * after offlining some others succeeded.
+        */
+       online_types = kmalloc_array(mb_count, sizeof(*online_types),
+                                    GFP_KERNEL);
+       if (!online_types)
+               return -ENOMEM;
+       /*
+        * Initialize all states to MMOP_OFFLINE, so when we abort processing in
+        * try_offline_memory_block(), we'll skip all unprocessed blocks in
+        * try_reonline_memory_block().
+        */
+       memset(online_types, MMOP_OFFLINE, mb_count);
   
         lock_device_hotplug();
-       mem = find_memory_block(__pfn_to_section(PFN_DOWN(start)));
-       if (mem)
-               rc = device_offline(&mem->dev);
-       /* Ignore if the device is already offline. */
-       if (rc > 0)
-               rc = 0;
+ 
+       tmp = online_types;
+       rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
   
         /*
-        * In case we succeeded to offline the memory block, remove it.
+        * In case we succeeded to offline all memory, remove it.
          * This cannot fail as it cannot get onlined in the meantime.
          */
         if (!rc) {
                 rc = try_remove_memory(nid, start, size);
-               WARN_ON_ONCE(rc);
+               if (rc)
+                       pr_err("%s: Failed to remove memory: %d", __func__, rc);
+       }
+ 
+       /*
+        * Rollback what we did. While memory onlining might theoretically fail
+        * (nacked by a notifier), it barely ever happens.
+        */
+       if (rc) {
+               tmp = online_types;
+               walk_memory_blocks(start, size, &tmp,
+                                  try_reonline_memory_block);
         }
         unlock_device_hotplug();
   
+       kfree(online_types);
         return rc;
   }
   EXPORT_SYMBOL_GPL(offline_and_remove_memory);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Dec 2020 20:06:46 +0000 (12:06 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Dec 2020 20:06:46 +0000 (12:06 -0800)
		1	2
drivers/net/virtio_net.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/vdpa/mlx5/net/mlx5_vnet.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory_hotplug.c	patch \|	diff1 \|	diff2 \|	blob \| history