dax/kmem: use a single static memory group for a single probed unit
authorDavid Hildenbrand <david@redhat.com>
Wed, 8 Sep 2021 02:55:37 +0000 (19:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Sep 2021 18:50:23 +0000 (11:50 -0700)
Although dax/kmem users often disable auto-onlining and instead online
memory manually (usually to ZONE_MOVABLE), there is still value in having
auto-onlining be aware of the relationship of memory blocks.

Let's treat one probed unit as a single static memory device, similar to a
single ACPI memory device.

Link: https://lkml.kernel.org/r/20210806124715.17090-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hui Zhu <teawater@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Marek Kedzierski <mkedzier@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/dax/kmem.c

index 99e0f60..a376220 100644 (file)
@@ -37,15 +37,16 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
 
 struct dax_kmem_data {
        const char *res_name;
+       int mgid;
        struct resource *res[];
 };
 
 static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 {
        struct device *dev = &dev_dax->dev;
+       unsigned long total_len = 0;
        struct dax_kmem_data *data;
-       int rc = -ENOMEM;
-       int i, mapped = 0;
+       int i, rc, mapped = 0;
        int numa_node;
 
        /*
@@ -61,24 +62,44 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
                return -EINVAL;
        }
 
+       for (i = 0; i < dev_dax->nr_range; i++) {
+               struct range range;
+
+               rc = dax_kmem_range(dev_dax, i, &range);
+               if (rc) {
+                       dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
+                                       i, range.start, range.end);
+                       continue;
+               }
+               total_len += range_len(&range);
+       }
+
+       if (!total_len) {
+               dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
+               return -EINVAL;
+       }
+
        data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
+       rc = -ENOMEM;
        data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
        if (!data->res_name)
                goto err_res_name;
 
+       rc = memory_group_register_static(numa_node, total_len);
+       if (rc < 0)
+               goto err_reg_mgid;
+       data->mgid = rc;
+
        for (i = 0; i < dev_dax->nr_range; i++) {
                struct resource *res;
                struct range range;
 
                rc = dax_kmem_range(dev_dax, i, &range);
-               if (rc) {
-                       dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
-                                       i, range.start, range.end);
+               if (rc)
                        continue;
-               }
 
                /* Region is permanently reserved if hotremove fails. */
                res = request_mem_region(range.start, range_len(&range), data->res_name);
@@ -108,8 +129,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
                 * Ensure that future kexec'd kernels will not treat
                 * this as RAM automatically.
                 */
-               rc = add_memory_driver_managed(numa_node, range.start,
-                               range_len(&range), kmem_name, MHP_NONE);
+               rc = add_memory_driver_managed(data->mgid, range.start,
+                               range_len(&range), kmem_name, MHP_NID_IS_MGID);
 
                if (rc) {
                        dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
@@ -129,6 +150,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
        return 0;
 
 err_request_mem:
+       memory_group_unregister(data->mgid);
+err_reg_mgid:
        kfree(data->res_name);
 err_res_name:
        kfree(data);
@@ -171,6 +194,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
        }
 
        if (success >= dev_dax->nr_range) {
+               memory_group_unregister(data->mgid);
                kfree(data->res_name);
                kfree(data);
                dev_set_drvdata(dev, NULL);