Merge branch 'for-5.7/libnvdimm' into libnvdimm-for-next

author Dan Williams <dan.j.williams@intel.com>

Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)

committer Dan Williams <dan.j.williams@intel.com>

Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
author Dan Williams <dan.j.williams@intel.com>
Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
committer Dan Williams <dan.j.williams@intel.com>
Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
diff --git a/MAINTAINERS b/MAINTAINERS

index a0d8649..6e65c79 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9569,6 +9569,7 @@ F:        drivers/acpi/nfit/*
  F:     include/linux/nd.h
  F:     include/linux/libnvdimm.h
  F:     include/uapi/linux/ndctl.h
+F:     tools/testing/nvdimm/
  
  LICENSES and SPDX stuff
  M:     Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig

index 497b7d0..e6ffe90 100644 (file)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -122,6 +122,7 @@ config PPC
         select ARCH_HAS_GCOV_PROFILE_ALL
         select ARCH_HAS_KCOV
         select ARCH_HAS_HUGEPD                  if HUGETLB_PAGE
+       select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
         select ARCH_HAS_MMIOWB                  if PPC64
         select ARCH_HAS_PHYS_TO_DMA
         select ARCH_HAS_PMEM_API
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c

index fc66964..b1a0aeb 100644 (file)
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -2,6 +2,7 @@
  
  #include <linux/io.h>
  #include <linux/slab.h>
+#include <linux/mmzone.h>
  #include <linux/vmalloc.h>
  #include <asm/io-workarounds.h>
  
@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
  
         return NULL;
  }
+
+#ifdef CONFIG_ZONE_DEVICE
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+       unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+
+       if (radix_enabled())
+               return SUBSECTION_SIZE;
+       return max(SUBSECTION_SIZE, 1UL << shift);
+
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c

index 922a4fc..b642c4a 100644 (file)
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -285,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
         return 0;
  }
  
-static inline int papr_scm_node(int node)
-{
-       int min_dist = INT_MAX, dist;
-       int nid, min_node;
-
-       if ((node == NUMA_NO_NODE) || node_online(node))
-               return node;
-
-       min_node = first_online_node;
-       for_each_online_node(nid) {
-               dist = node_distance(node, nid);
-               if (dist < min_dist) {
-                       min_dist = dist;
-                       min_node = nid;
-               }
-       }
-       return min_node;
-}
-
  static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
  {
         struct device *dev = &p->pdev->dev;
@@ -328,7 +309,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
         }
  
         dimm_flags = 0;
-       set_bit(NDD_ALIASING, &dimm_flags);
+       set_bit(NDD_LABELING, &dimm_flags);
  
         p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
                                   PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
@@ -349,7 +330,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
  
         memset(&ndr_desc, 0, sizeof(ndr_desc));
         target_nid = dev_to_node(&p->pdev->dev);
-       online_nid = papr_scm_node(target_nid);
+       online_nid = numa_map_to_online_node(target_nid);
         ndr_desc.numa_node = online_nid;
         ndr_desc.target_node = target_nid;
         ndr_desc.res = &p->res;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index beea770..d4e446d 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1664,6 +1664,7 @@ config X86_PMEM_LEGACY
         depends on PHYS_ADDR_T_64BIT
         depends on BLK_DEV
         select X86_PMEM_LEGACY_DEVICE
+       select NUMA_KEEP_MEMINFO if NUMA
         select LIBNVDIMM
         help
           Treat memory marked using the non-standard e820 type of 12 as used
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c

index 99f7a68..59ba008 100644 (file)
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
  struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
  EXPORT_SYMBOL(node_data);
  
-static struct numa_meminfo numa_meminfo
-#ifndef CONFIG_MEMORY_HOTPLUG
-__initdata
-#endif
-;
+static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
+static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
  
  static int numa_distance_cnt;
  static u8 *numa_distance;
@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
                 (mi->nr_blks - idx) * sizeof(mi->blk[0]));
  }
  
+/**
+ * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
+ * @dst: numa_meminfo to append block to
+ * @idx: Index of memblk to remove
+ * @src: numa_meminfo to remove memblk from
+ */
+static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
+                                        struct numa_meminfo *src)
+{
+       dst->blk[dst->nr_blks++] = src->blk[idx];
+       numa_remove_memblk_from(idx, src);
+}
+
  /**
   * numa_add_memblk - Add one numa_memblk to numa_meminfo
   * @nid: NUMA node ID of the new memblk
@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
         for (i = 0; i < mi->nr_blks; i++) {
                 struct numa_memblk *bi = &mi->blk[i];
  
-               /* make sure all blocks are inside the limits */
+               /* move / save reserved memory ranges */
+               if (!memblock_overlaps_region(&memblock.memory,
+                                       bi->start, bi->end - bi->start)) {
+                       numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
+                       continue;
+               }
+
+               /* make sure all non-reserved blocks are inside the limits */
                 bi->start = max(bi->start, low);
                 bi->end = min(bi->end, high);
  
-               /* and there's no empty or non-exist block */
-               if (bi->start >= bi->end ||
-                   !memblock_overlaps_region(&memblock.memory,
-                       bi->start, bi->end - bi->start))
+               /* and there's no empty block */
+               if (bi->start >= bi->end)
                         numa_remove_memblk_from(i--, mi);
         }
  
@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
  
  #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
  
-#ifdef CONFIG_MEMORY_HOTPLUG
-int memory_add_physaddr_to_nid(u64 start)
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
  {
-       struct numa_meminfo *mi = &numa_meminfo;
-       int nid = mi->blk[0].nid;
         int i;
  
         for (i = 0; i < mi->nr_blks; i++)
                 if (mi->blk[i].start <= start && mi->blk[i].end > start)
-                       nid = mi->blk[i].nid;
+                       return mi->blk[i].nid;
+       return NUMA_NO_NODE;
+}
+
+int phys_to_target_node(phys_addr_t start)
+{
+       int nid = meminfo_to_nid(&numa_meminfo, start);
+
+       /*
+        * Prefer online nodes, but if reserved memory might be
+        * hot-added continue the search with reserved ranges.
+        */
+       if (nid != NUMA_NO_NODE)
+               return nid;
+
+       return meminfo_to_nid(&numa_reserved_meminfo, start);
+}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
+
+int memory_add_physaddr_to_nid(u64 start)
+{
+       int nid = meminfo_to_nid(&numa_meminfo, start);
+
+       if (nid == NUMA_NO_NODE)
+               nid = numa_meminfo.blk[0].nid;
         return nid;
  }
  EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index a3320f9..fa4500f 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -360,7 +360,7 @@ static union acpi_object *acpi_label_info(acpi_handle handle)
  
  static u8 nfit_dsm_revid(unsigned family, unsigned func)
  {
-       static const u8 revid_table[NVDIMM_FAMILY_MAX+1][32] = {
+       static const u8 revid_table[NVDIMM_FAMILY_MAX+1][NVDIMM_CMD_MAX+1] = {
                 [NVDIMM_FAMILY_INTEL] = {
                         [NVDIMM_INTEL_GET_MODES] = 2,
                         [NVDIMM_INTEL_GET_FWINFO] = 2,
@@ -386,7 +386,7 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func)
  
         if (family > NVDIMM_FAMILY_MAX)
                 return 0;
-       if (func > 31)
+       if (func > NVDIMM_CMD_MAX)
                 return 0;
         id = revid_table[family][func];
         if (id == 0)
@@ -492,7 +492,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
          * Check for a valid command.  For ND_CMD_CALL, we also have to
          * make sure that the DSM function is supported.
          */
-       if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask))
+       if (cmd == ND_CMD_CALL &&
+           (func > NVDIMM_CMD_MAX || !test_bit(func, &dsm_mask)))
                 return -ENOTTY;
         else if (!test_bit(cmd, &cmd_mask))
                 return -ENOTTY;
@@ -2026,8 +2027,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
                         continue;
                 }
  
-               if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+               if (nfit_mem->bdw && nfit_mem->memdev_pmem) {
                         set_bit(NDD_ALIASING, &flags);
+                       set_bit(NDD_LABELING, &flags);
+               }
  
                 /* collate flags across all memdevs for this dimm */
                 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
@@ -3492,7 +3495,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
         if (nvdimm && cmd == ND_CMD_CALL &&
                         call_pkg->nd_family == NVDIMM_FAMILY_INTEL) {
                 func = call_pkg->nd_command;
-               if ((1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK)
+               if (func > NVDIMM_CMD_MAX ||
+                   (1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK)
                         return -EOPNOTSUPP;
         }
  
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h

index af09143..f5525f8 100644 (file)
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -34,6 +34,7 @@
                 | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
  
  #define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV
+#define NVDIMM_CMD_MAX 31
  
  #define NVDIMM_STANDARD_CMDMASK \
  (1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c

index eadbf90..47b4969 100644 (file)
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
  }
  EXPORT_SYMBOL(acpi_map_pxm_to_node);
  
-/**
- * acpi_map_pxm_to_online_node - Map proximity ID to online node
- * @pxm: ACPI proximity ID
- *
- * This is similar to acpi_map_pxm_to_node(), but always returns an online
- * node.  When the mapped node from a given proximity ID is offline, it
- * looks up the node distance table and returns the nearest online node.
- *
- * ACPI device drivers, which are called after the NUMA initialization has
- * completed in the kernel, can call this interface to obtain their device
- * NUMA topology from ACPI tables.  Such drivers do not have to deal with
- * offline nodes.  A node may be offline when a device proximity ID is
- * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
- * "numa=off" on x86.
- */
-int acpi_map_pxm_to_online_node(int pxm)
-{
-       int node, min_node;
-
-       node = acpi_map_pxm_to_node(pxm);
-
-       if (node == NUMA_NO_NODE)
-               node = 0;
-
-       min_node = node;
-       if (!node_online(node)) {
-               int min_dist = INT_MAX, dist, n;
-
-               for_each_online_node(n) {
-                       dist = node_distance(node, n);
-                       if (dist < min_dist) {
-                               min_dist = dist;
-                               min_node = n;
-                       }
-               }
-       }
-
-       return min_node;
-}
-EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
-
  static void __init
  acpi_table_print_srat_entry(struct acpi_subtable_header *header)
  {
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c

index a8b5159..09087c3 100644 (file)
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -1042,8 +1042,10 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
                         return -EFAULT;
         }
  
-       if (!desc || (desc->out_num + desc->in_num == 0) ||
-                       !test_bit(cmd, &cmd_mask))
+       if (!desc ||
+           (desc->out_num + desc->in_num == 0) ||
+           cmd > ND_CMD_CALL ||
+           !test_bit(cmd, &cmd_mask))
                 return -ENOTTY;
  
         /* fail write commands (when read-only) */
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c

index 64776ed..7d4ddc4 100644 (file)
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -99,7 +99,7 @@ static int nvdimm_probe(struct device *dev)
         if (ndd->ns_current >= 0) {
                 rc = nd_label_reserve_dpa(ndd);
                 if (rc == 0)
-                       nvdimm_set_aliasing(dev);
+                       nvdimm_set_labeling(dev);
         }
         nvdimm_bus_unlock(dev);
  
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c

index 94ea6db..b7b77e8 100644 (file)
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -32,7 +32,7 @@ int nvdimm_check_config_data(struct device *dev)
  
         if (!nvdimm->cmd_mask ||
             !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
-               if (test_bit(NDD_ALIASING, &nvdimm->flags))
+               if (test_bit(NDD_LABELING, &nvdimm->flags))
                         return -ENXIO;
                 else
                         return -ENOTTY;
@@ -173,11 +173,11 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
         return rc;
  }
  
-void nvdimm_set_aliasing(struct device *dev)
+void nvdimm_set_labeling(struct device *dev)
  {
         struct nvdimm *nvdimm = to_nvdimm(dev);
  
-       set_bit(NDD_ALIASING, &nvdimm->flags);
+       set_bit(NDD_LABELING, &nvdimm->flags);
  }
  
  void nvdimm_set_locked(struct device *dev)
@@ -312,8 +312,9 @@ static ssize_t flags_show(struct device *dev,
  {
         struct nvdimm *nvdimm = to_nvdimm(dev);
  
-       return sprintf(buf, "%s%s\n",
+       return sprintf(buf, "%s%s%s\n",
                         test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
+                       test_bit(NDD_LABELING, &nvdimm->flags) ? "label " : "",
                         test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
  }
  static DEVICE_ATTR_RO(flags);
@@ -562,6 +563,21 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
         return rc;
  }
  
+static unsigned long dpa_align(struct nd_region *nd_region)
+{
+       struct device *dev = &nd_region->dev;
+
+       if (dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev),
+                               "bus lock required for capacity provision\n"))
+               return 0;
+       if (dev_WARN_ONCE(dev, !nd_region->ndr_mappings || nd_region->align
+                               % nd_region->ndr_mappings,
+                               "invalid region align %#lx mappings: %d\n",
+                               nd_region->align, nd_region->ndr_mappings))
+               return 0;
+       return nd_region->align / nd_region->ndr_mappings;
+}
+
  int alias_dpa_busy(struct device *dev, void *data)
  {
         resource_size_t map_end, blk_start, new;
@@ -570,6 +586,7 @@ int alias_dpa_busy(struct device *dev, void *data)
         struct nd_region *nd_region;
         struct nvdimm_drvdata *ndd;
         struct resource *res;
+       unsigned long align;
         int i;
  
         if (!is_memory(dev))
@@ -607,13 +624,21 @@ int alias_dpa_busy(struct device *dev, void *data)
          * Find the free dpa from the end of the last pmem allocation to
          * the end of the interleave-set mapping.
          */
+       align = dpa_align(nd_region);
+       if (!align)
+               return 0;
+
         for_each_dpa_resource(ndd, res) {
+               resource_size_t start, end;
+
                 if (strncmp(res->name, "pmem", 4) != 0)
                         continue;
-               if ((res->start >= blk_start && res->start < map_end)
-                               || (res->end >= blk_start
-                                       && res->end <= map_end)) {
-                       new = max(blk_start, min(map_end + 1, res->end + 1));
+
+               start = ALIGN_DOWN(res->start, align);
+               end = ALIGN(res->end + 1, align) - 1;
+               if ((start >= blk_start && start < map_end)
+                               || (end >= blk_start && end <= map_end)) {
+                       new = max(blk_start, min(map_end, end) + 1);
                         if (new != blk_start) {
                                 blk_start = new;
                                 goto retry;
@@ -653,6 +678,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
                 .res = NULL,
         };
         struct resource *res;
+       unsigned long align;
  
         if (!ndd)
                 return 0;
@@ -660,10 +686,20 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
         device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
  
         /* now account for busy blk allocations in unaliased dpa */
+       align = dpa_align(nd_region);
+       if (!align)
+               return 0;
         for_each_dpa_resource(ndd, res) {
+               resource_size_t start, end, size;
+
                 if (strncmp(res->name, "blk", 3) != 0)
                         continue;
-               info.available -= resource_size(res);
+               start = ALIGN_DOWN(res->start, align);
+               end = ALIGN(res->end + 1, align) - 1;
+               size = end - start + 1;
+               if (size >= info.available)
+                       return 0;
+               info.available -= size;
         }
  
         return info.available;
@@ -682,19 +718,31 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
         struct nvdimm_bus *nvdimm_bus;
         resource_size_t max = 0;
         struct resource *res;
+       unsigned long align;
  
         /* if a dimm is disabled the available capacity is zero */
         if (!ndd)
                 return 0;
  
+       align = dpa_align(nd_region);
+       if (!align)
+               return 0;
+
         nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
         if (__reserve_free_pmem(&nd_region->dev, nd_mapping->nvdimm))
                 return 0;
         for_each_dpa_resource(ndd, res) {
+               resource_size_t start, end;
+
                 if (strcmp(res->name, "pmem-reserve") != 0)
                         continue;
-               if (resource_size(res) > max)
-                       max = resource_size(res);
+               /* trim free space relative to current alignment setting */
+               start = ALIGN(res->start, align);
+               end = ALIGN_DOWN(res->end + 1, align) - 1;
+               if (end < start)
+                       continue;
+               if (end - start + 1 > max)
+                       max = end - start + 1;
         }
         release_free_pmem(nvdimm_bus, nd_mapping);
         return max;
@@ -722,24 +770,33 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
         struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
         struct resource *res;
         const char *reason;
+       unsigned long align;
  
         if (!ndd)
                 return 0;
  
+       align = dpa_align(nd_region);
+       if (!align)
+               return 0;
+
         map_start = nd_mapping->start;
         map_end = map_start + nd_mapping->size - 1;
         blk_start = max(map_start, map_end + 1 - *overlap);
         for_each_dpa_resource(ndd, res) {
-               if (res->start >= map_start && res->start < map_end) {
+               resource_size_t start, end;
+
+               start = ALIGN_DOWN(res->start, align);
+               end = ALIGN(res->end + 1, align) - 1;
+               if (start >= map_start && start < map_end) {
                         if (strncmp(res->name, "blk", 3) == 0)
                                 blk_start = min(blk_start,
-                                               max(map_start, res->start));
-                       else if (res->end > map_end) {
+                                               max(map_start, start));
+                       else if (end > map_end) {
                                 reason = "misaligned to iset";
                                 goto err;
                         } else
-                               busy += resource_size(res);
-               } else if (res->end >= map_start && res->end <= map_end) {
+                               busy += end - start + 1;
+               } else if (end >= map_start && end <= map_end) {
                         if (strncmp(res->name, "blk", 3) == 0) {
                                 /*
                                  * If a BLK allocation overlaps the start of
@@ -748,8 +805,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
                                  */
                                 blk_start = map_start;
                         } else
-                               busy += resource_size(res);
-               } else if (map_start > res->start && map_start < res->end) {
+                               busy += end - start + 1;
+               } else if (map_start > start && map_start < end) {
                         /* total eclipse of the mapping */
                         busy += nd_mapping->size;
                         blk_start = map_start;
@@ -759,7 +816,7 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
         *overlap = map_end + 1 - blk_start;
         available = blk_start - map_start;
         if (busy < available)
-               return available - busy;
+               return ALIGN_DOWN(available - busy, align);
         return 0;
  
   err:
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c

index e02f60a..4cd18be 100644 (file)
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -7,6 +7,7 @@
  #include <linux/memory_hotplug.h>
  #include <linux/libnvdimm.h>
  #include <linux/module.h>
+#include <linux/numa.h>
  
  static int e820_pmem_remove(struct platform_device *pdev)
  {
@@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev)
         return 0;
  }
  
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int e820_range_to_nid(resource_size_t addr)
-{
-       return memory_add_physaddr_to_nid(addr);
-}
-#else
-static int e820_range_to_nid(resource_size_t addr)
-{
-       return NUMA_NO_NODE;
-}
-#endif
-
  static int e820_register_one(struct resource *res, void *data)
  {
         struct nd_region_desc ndr_desc;
         struct nvdimm_bus *nvdimm_bus = data;
+       int nid = phys_to_target_node(res->start);
  
         memset(&ndr_desc, 0, sizeof(ndr_desc));
         ndr_desc.res = res;
-       ndr_desc.numa_node = e820_range_to_nid(res->start);
-       ndr_desc.target_node = ndr_desc.numa_node;
+       ndr_desc.numa_node = numa_map_to_online_node(nid);
+       ndr_desc.target_node = nid;
         set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
         if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
                 return -ENXIO;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c

index 032dc61..ae155e8 100644 (file)
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -10,6 +10,7 @@
  #include <linux/nd.h>
  #include "nd-core.h"
  #include "pmem.h"
+#include "pfn.h"
  #include "nd.h"
  
  static void namespace_io_release(struct device *dev)
@@ -541,6 +542,11 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
  {
         bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
         bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+       unsigned long align;
+
+       align = nd_region->align / nd_region->ndr_mappings;
+       valid->start = ALIGN(valid->start, align);
+       valid->end = ALIGN_DOWN(valid->end + 1, align) - 1;
  
         if (valid->start >= valid->end)
                 goto invalid;
@@ -980,10 +986,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
                 return -ENXIO;
         }
  
-       div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
+       div_u64_rem(val, nd_region->align, &remainder);
         if (remainder) {
                 dev_dbg(dev, "%llu is not %ldK aligned\n", val,
-                               (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
+                               nd_region->align / SZ_1K);
                 return -EINVAL;
         }
  
@@ -1739,6 +1745,22 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
                 return ERR_PTR(-ENODEV);
         }
  
+       /*
+        * Note, alignment validation for fsdax and devdax mode
+        * namespaces happens in nd_pfn_validate() where infoblock
+        * padding parameters can be applied.
+        */
+       if (pmem_should_map_pages(dev)) {
+               struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+               struct resource *res = &nsio->res;
+
+               if (!IS_ALIGNED(res->start | (res->end + 1),
+                                       memremap_compat_align())) {
+                       dev_err(&ndns->dev, "%pr misaligned, unable to map\n", res);
+                       return ERR_PTR(-EOPNOTSUPP);
+               }
+       }
+
         if (is_namespace_pmem(&ndns->dev)) {
                 struct nd_namespace_pmem *nspm;
  
@@ -2521,7 +2543,7 @@ static int init_active_labels(struct nd_region *nd_region)
                 if (!ndd) {
                         if (test_bit(NDD_LOCKED, &nvdimm->flags))
                                 /* fail, label data may be unreadable */;
-                       else if (test_bit(NDD_ALIASING, &nvdimm->flags))
+                       else if (test_bit(NDD_LABELING, &nvdimm->flags))
                                 /* fail, labels needed to disambiguate dpa */;
                         else
                                 return 0;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h

index 2cf77bc..85dbb2a 100644 (file)
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -146,6 +146,7 @@ struct nd_region {
         struct device *btt_seed;
         struct device *pfn_seed;
         struct device *dax_seed;
+       unsigned long align;
         u16 ndr_mappings;
         u64 ndr_size;
         u64 ndr_start;
@@ -252,7 +253,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
                 void *buf, size_t len);
  long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
                 unsigned int len);
-void nvdimm_set_aliasing(struct device *dev);
+void nvdimm_set_labeling(struct device *dev);
  void nvdimm_set_locked(struct device *dev);
  void nvdimm_clear_locked(struct device *dev);
  int nvdimm_security_setup_events(struct device *dev);
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h

index acb1951..37cb1b8 100644 (file)
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -24,6 +24,18 @@ struct nd_pfn_sb {
         __le64 npfns;
         __le32 mode;
         /* minor-version-1 additions for section alignment */
+       /**
+        * @start_pad: Deprecated attribute to pad start-misaligned namespaces
+        *
+        * start_pad is deprecated because the original definition did
+        * not comprehend that dataoff is relative to the base address
+        * of the namespace not the start_pad adjusted base. The result
+        * is that the dax path is broken, but the block-I/O path is
+        * not. The kernel will no longer create namespaces using start
+        * padding, but it still supports block-I/O for legacy
+        * configurations mainly to allow a backup, reconfigure the
+        * namespace, and restore flow to repair dax operation.
+        */
         __le32 start_pad;
         __le32 end_trunc;
         /* minor-version-2 record the base alignment of the mapping */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c

index b94f7a7..34db557 100644 (file)
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -446,6 +446,7 @@ static bool nd_supported_alignment(unsigned long align)
  int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
  {
         u64 checksum, offset;
+       struct resource *res;
         enum nd_pfn_mode mode;
         struct nd_namespace_io *nsio;
         unsigned long align, start_pad;
@@ -561,14 +562,14 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                         dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
                                         nd_pfn->align, align, nd_pfn->mode,
                                         mode);
-                       return -EINVAL;
+                       return -EOPNOTSUPP;
                 }
         }
  
         if (align > nvdimm_namespace_capacity(ndns)) {
                 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
                                 align, nvdimm_namespace_capacity(ndns));
-               return -EINVAL;
+               return -EOPNOTSUPP;
         }
  
         /*
@@ -578,18 +579,31 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
          * established.
          */
         nsio = to_nd_namespace_io(&ndns->dev);
-       if (offset >= resource_size(&nsio->res)) {
+       res = &nsio->res;
+       if (offset >= resource_size(res)) {
                 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
                                 dev_name(&ndns->dev));
-               return -EBUSY;
+               return -EOPNOTSUPP;
         }
  
-       if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
+       if ((align && !IS_ALIGNED(res->start + offset + start_pad, align))
                         || !IS_ALIGNED(offset, PAGE_SIZE)) {
                 dev_err(&nd_pfn->dev,
                                 "bad offset: %#llx dax disabled align: %#lx\n",
                                 offset, align);
-               return -ENXIO;
+               return -EOPNOTSUPP;
+       }
+
+       if (!IS_ALIGNED(res->start + le32_to_cpu(pfn_sb->start_pad),
+                               memremap_compat_align())) {
+               dev_err(&nd_pfn->dev, "resource start misaligned\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (!IS_ALIGNED(res->end + 1 - le32_to_cpu(pfn_sb->end_trunc),
+                               memremap_compat_align())) {
+               dev_err(&nd_pfn->dev, "resource end misaligned\n");
+               return -EOPNOTSUPP;
         }
  
         return 0;
@@ -750,7 +764,19 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
         start = nsio->res.start;
         size = resource_size(&nsio->res);
         npfns = PHYS_PFN(size - SZ_8K);
-       align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
+       align = max(nd_pfn->align, memremap_compat_align());
+
+       /*
+        * When @start is misaligned fail namespace creation. See
+        * the 'struct nd_pfn_sb' commentary on why ->start_pad is not
+        * an option.
+        */
+       if (!IS_ALIGNED(start, memremap_compat_align())) {
+               dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n",
+                               dev_name(&ndns->dev), &start,
+                               memremap_compat_align());
+               return -EINVAL;
+       }
         end_trunc = start + size - ALIGN_DOWN(start + size, align);
         if (nd_pfn->mode == PFN_MODE_PMEM) {
                 /*
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c

index a19e535..ccbb5b4 100644 (file)
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -195,16 +195,16 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
  int nd_region_to_nstype(struct nd_region *nd_region)
  {
         if (is_memory(&nd_region->dev)) {
-               u16 i, alias;
+               u16 i, label;
  
-               for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+               for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) {
                         struct nd_mapping *nd_mapping = &nd_region->mapping[i];
                         struct nvdimm *nvdimm = nd_mapping->nvdimm;
  
-                       if (test_bit(NDD_ALIASING, &nvdimm->flags))
-                               alias++;
+                       if (test_bit(NDD_LABELING, &nvdimm->flags))
+                               label++;
                 }
-               if (alias)
+               if (label)
                         return ND_DEVICE_NAMESPACE_PMEM;
                 else
                         return ND_DEVICE_NAMESPACE_IO;
@@ -216,21 +216,25 @@ int nd_region_to_nstype(struct nd_region *nd_region)
  }
  EXPORT_SYMBOL(nd_region_to_nstype);
  
-static ssize_t size_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static unsigned long long region_size(struct nd_region *nd_region)
  {
-       struct nd_region *nd_region = to_nd_region(dev);
-       unsigned long long size = 0;
-
-       if (is_memory(dev)) {
-               size = nd_region->ndr_size;
+       if (is_memory(&nd_region->dev)) {
+               return nd_region->ndr_size;
         } else if (nd_region->ndr_mappings == 1) {
                 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
  
-               size = nd_mapping->size;
+               return nd_mapping->size;
         }
  
-       return sprintf(buf, "%llu\n", size);
+       return 0;
+}
+
+static ssize_t size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_region *nd_region = to_nd_region(dev);
+
+       return sprintf(buf, "%llu\n", region_size(nd_region));
  }
  static DEVICE_ATTR_RO(size);
  
@@ -529,6 +533,54 @@ static ssize_t read_only_store(struct device *dev,
  }
  static DEVICE_ATTR_RW(read_only);
  
+static ssize_t align_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_region *nd_region = to_nd_region(dev);
+
+       return sprintf(buf, "%#lx\n", nd_region->align);
+}
+
+static ssize_t align_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct nd_region *nd_region = to_nd_region(dev);
+       unsigned long val, dpa;
+       u32 remainder;
+       int rc;
+
+       rc = kstrtoul(buf, 0, &val);
+       if (rc)
+               return rc;
+
+       if (!nd_region->ndr_mappings)
+               return -ENXIO;
+
+       /*
+        * Ensure space-align is evenly divisible by the region
+        * interleave-width because the kernel typically has no facility
+        * to determine which DIMM(s), dimm-physical-addresses, would
+        * contribute to the tail capacity in system-physical-address
+        * space for the namespace.
+        */
+       dpa = div_u64_rem(val, nd_region->ndr_mappings, &remainder);
+       if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
+                       || val > region_size(nd_region) || remainder)
+               return -EINVAL;
+
+       /*
+        * Given that space allocation consults this value multiple
+        * times ensure it does not change for the duration of the
+        * allocation.
+        */
+       nvdimm_bus_lock(dev);
+       nd_region->align = val;
+       nvdimm_bus_unlock(dev);
+
+       return len;
+}
+static DEVICE_ATTR_RW(align);
+
  static ssize_t region_badblocks_show(struct device *dev,
                 struct device_attribute *attr, char *buf)
  {
@@ -571,6 +623,7 @@ static DEVICE_ATTR_RO(persistence_domain);
  
  static struct attribute *nd_region_attributes[] = {
         &dev_attr_size.attr,
+       &dev_attr_align.attr,
         &dev_attr_nstype.attr,
         &dev_attr_mappings.attr,
         &dev_attr_btt_seed.attr,
@@ -626,6 +679,19 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
                 return a->mode;
         }
  
+       if (a == &dev_attr_align.attr) {
+               int i;
+
+               for (i = 0; i < nd_region->ndr_mappings; i++) {
+                       struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+                       struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+                       if (test_bit(NDD_LABELING, &nvdimm->flags))
+                               return a->mode;
+               }
+               return 0;
+       }
+
         if (a != &dev_attr_set_cookie.attr
                         && a != &dev_attr_available_size.attr)
                 return a->mode;
@@ -935,6 +1001,41 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
  }
  EXPORT_SYMBOL(nd_region_release_lane);
  
+/*
+ * PowerPC requires this alignment for memremap_pages(). All other archs
+ * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
+ */
+#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
+
+static unsigned long default_align(struct nd_region *nd_region)
+{
+       unsigned long align;
+       int i, mappings;
+       u32 remainder;
+
+       if (is_nd_blk(&nd_region->dev))
+               align = PAGE_SIZE;
+       else
+               align = MEMREMAP_COMPAT_ALIGN_MAX;
+
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+               if (test_bit(NDD_ALIASING, &nvdimm->flags)) {
+                       align = MEMREMAP_COMPAT_ALIGN_MAX;
+                       break;
+               }
+       }
+
+       mappings = max_t(u16, 1, nd_region->ndr_mappings);
+       div_u64_rem(align, mappings, &remainder);
+       if (remainder)
+               align *= mappings;
+
+       return align;
+}
+
  static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                 struct nd_region_desc *ndr_desc,
                 const struct device_type *dev_type, const char *caller)
@@ -1039,6 +1140,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
         dev->of_node = ndr_desc->of_node;
         nd_region->ndr_size = resource_size(ndr_desc->res);
         nd_region->ndr_start = ndr_desc->res->start;
+       nd_region->align = default_align(nd_region);
         if (ndr_desc->flush)
                 nd_region->flush = ndr_desc->flush;
         else
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index 0f24d70..3839363 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str);
  extern bool acpi_osi_is_win8(void);
  
  #ifdef CONFIG_ACPI_NUMA
-int acpi_map_pxm_to_online_node(int pxm);
  int acpi_map_pxm_to_node(int pxm);
  int acpi_get_node(acpi_handle handle);
+
+/**
+ * acpi_map_pxm_to_online_node - Map proximity ID to online node
+ * @pxm: ACPI proximity ID
+ *
+ * This is similar to acpi_map_pxm_to_node(), but always returns an online
+ * node.  When the mapped node from a given proximity ID is offline, it
+ * looks up the node distance table and returns the nearest online node.
+ *
+ * ACPI device drivers, which are called after the NUMA initialization has
+ * completed in the kernel, can call this interface to obtain their device
+ * NUMA topology from ACPI tables.  Such drivers do not have to deal with
+ * offline nodes.  A node may be offline when a device proximity ID is
+ * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
+ * "numa=off" on x86.
+ */
+static inline int acpi_map_pxm_to_online_node(int pxm)
+{
+       int node = acpi_map_pxm_to_node(pxm);
+
+       return numa_map_to_online_node(node);
+}
  #else
  static inline int acpi_map_pxm_to_online_node(int pxm)
  {
diff --git a/include/linux/io.h b/include/linux/io.h

index b1c44bb..8394c56 100644 (file)
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -77,8 +77,6 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
                 size_t size, unsigned long flags);
  void devm_memunmap(struct device *dev, void *addr);
  
-void *__devm_memremap_pages(struct device *dev, struct resource *res);
-
  #ifdef CONFIG_PCI
  /*
   * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h

index 9df091b..18da405 100644 (file)
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -37,6 +37,8 @@ enum {
         NDD_WORK_PENDING = 4,
         /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
         NDD_NOBLK = 5,
+       /* dimm supports namespace labels */
+       NDD_LABELING = 6,
  
         /* need to set a limit somewhere, but yes, this is likely overkill */
         ND_IOCTL_MAX_BUFLEN = SZ_4M,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h

index 6fefb09..8af1cbd 100644 (file)
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -132,6 +132,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  
  unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
  void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
+unsigned long memremap_compat_align(void);
  #else
  static inline void *devm_memremap_pages(struct device *dev,
                 struct dev_pagemap *pgmap)
@@ -165,6 +166,12 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
                 unsigned long nr_pfns)
  {
  }
+
+/* when memremap_pages() is disabled all archs can remap a single page */
+static inline unsigned long memremap_compat_align(void)
+{
+       return PAGE_SIZE;
+}
  #endif /* CONFIG_ZONE_DEVICE */
  
  static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
@@ -172,4 +179,5 @@ static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
         if (pgmap)
                 percpu_ref_put(pgmap->ref);
  }
+
  #endif /* _LINUX_MEMREMAP_H_ */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 462f687..6b77f72 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1170,6 +1170,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
  #define SECTION_ALIGN_DOWN(pfn)        ((pfn) & PAGE_SECTION_MASK)
  
  #define SUBSECTION_SHIFT 21
+#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
  
  #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
  #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
diff --git a/include/linux/numa.h b/include/linux/numa.h

index 110b0e5..a42df80 100644 (file)
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -1,7 +1,7 @@
  /* SPDX-License-Identifier: GPL-2.0 */
  #ifndef _LINUX_NUMA_H
  #define _LINUX_NUMA_H
-
+#include <linux/types.h>
  
  #ifdef CONFIG_NODES_SHIFT
  #define NODES_SHIFT     CONFIG_NODES_SHIFT
@@ -13,4 +13,32 @@
  
  #define        NUMA_NO_NODE    (-1)
  
+/* optionally keep NUMA memory info available post init */
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+#define __initdata_or_meminfo
+#else
+#define __initdata_or_meminfo __initdata
+#endif
+
+#ifdef CONFIG_NUMA
+/* Generic implementation available */
+int numa_map_to_online_node(int node);
+
+/*
+ * Optional architecture specific implementation, users need a "depends
+ * on $ARCH"
+ */
+int phys_to_target_node(phys_addr_t addr);
+#else
+static inline int numa_map_to_online_node(int node)
+{
+       return NUMA_NO_NODE;
+}
+
+static inline int phys_to_target_node(phys_addr_t addr)
+{
+       return NUMA_NO_NODE;
+}
+#endif
+
  #endif /* _LINUX_NUMA_H */
diff --git a/lib/Kconfig b/lib/Kconfig

index bc7e563..5d53f96 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -615,6 +615,9 @@ config ARCH_HAS_PMEM_API
  config MEMREGION
         bool
  
+config ARCH_HAS_MEMREMAP_COMPAT_ALIGN
+       bool
+
  # use memcpy to implement user copies for nommu architectures
  config UACCESS_MEMCPY
         bool
diff --git a/mm/Kconfig b/mm/Kconfig

index ab80933..3282684 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -139,6 +139,10 @@ config HAVE_FAST_GUP
  config ARCH_KEEP_MEMBLOCK
         bool
  
+# Keep arch NUMA mapping infrastructure post-init.
+config NUMA_KEEP_MEMINFO
+       bool
+
  config MEMORY_ISOLATION
         bool
  
@@ -154,6 +158,7 @@ config MEMORY_HOTPLUG
         bool "Allow for memory hot-add"
         depends on SPARSEMEM || X86_64_ACPI_NUMA
         depends on ARCH_ENABLE_MEMORY_HOTPLUG
+       select NUMA_KEEP_MEMINFO if NUMA
  
  config MEMORY_HOTPLUG_SPARSE
         def_bool y
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 977c641..19f7e71 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -127,6 +127,32 @@ static struct mempolicy default_policy = {
  
  static struct mempolicy preferred_node_policy[MAX_NUMNODES];
  
+/**
+ * numa_map_to_online_node - Find closest online node
+ * @nid: Node id to start the search
+ *
+ * Lookup the next closest node by distance if @nid is not online.
+ */
+int numa_map_to_online_node(int node)
+{
+       int min_dist = INT_MAX, dist, n, min_node;
+
+       if (node == NUMA_NO_NODE || node_online(node))
+               return node;
+
+       min_node = node;
+       for_each_online_node(n) {
+               dist = node_distance(node, n);
+               if (dist < min_dist) {
+                       min_dist = dist;
+                       min_node = n;
+               }
+       }
+
+       return min_node;
+}
+EXPORT_SYMBOL_GPL(numa_map_to_online_node);
+
  struct mempolicy *get_task_policy(struct task_struct *p)
  {
         struct mempolicy *pol = p->mempolicy;
diff --git a/mm/memremap.c b/mm/memremap.c

index 09b5b7a..3e7afaf 100644 (file)
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -7,6 +7,7 @@
  #include <linux/mm.h>
  #include <linux/pfn_t.h>
  #include <linux/swap.h>
+#include <linux/mmzone.h>
  #include <linux/swapops.h>
  #include <linux/types.h>
  #include <linux/wait_bit.h>
@@ -14,6 +15,28 @@
  
  static DEFINE_XARRAY(pgmap_array);
  
+/*
+ * The memremap() and memremap_pages() interfaces are alternately used
+ * to map persistent memory namespaces. These interfaces place different
+ * constraints on the alignment and size of the mapping (namespace).
+ * memremap() can map individual PAGE_SIZE pages. memremap_pages() can
+ * only map subsections (2MB), and at least one architecture (PowerPC)
+ * the minimum mapping granularity of memremap_pages() is 16MB.
+ *
+ * The role of memremap_compat_align() is to communicate the minimum
+ * arch supported alignment of a namespace such that it can freely
+ * switch modes without violating the arch constraint. Namely, do not
+ * allow a namespace to be PAGE_SIZE aligned since that namespace may be
+ * reconfigured into a mode that requires SUBSECTION_SIZE alignment.
+ */
+#ifndef CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN
+unsigned long memremap_compat_align(void)
+{
+       return SUBSECTION_SIZE;
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
+
  #ifdef CONFIG_DEV_PAGEMAP_OPS
  DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
  EXPORT_SYMBOL(devmap_managed_key);
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild

index dbebf05..47f9cc9 100644 (file)
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -21,8 +21,8 @@ DRIVERS := ../../../drivers
  NVDIMM_SRC := $(DRIVERS)/nvdimm
  ACPI_SRC := $(DRIVERS)/acpi/nfit
  DAX_SRC := $(DRIVERS)/dax
-ccflags-y := -I$(src)/$(NVDIMM_SRC)/
-ccflags-y += -I$(src)/$(ACPI_SRC)/
+ccflags-y := -I$(srctree)/drivers/nvdimm/
+ccflags-y += -I$(srctree)/drivers/acpi/nfit/
  
  obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
  obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild

index fb3c3d7..75baebf 100644 (file)
--- a/tools/testing/nvdimm/test/Kbuild
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -1,6 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I$(src)/../../../../drivers/nvdimm/
-ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
+ccflags-y := -I$(srctree)/drivers/nvdimm/
+ccflags-y += -I$(srctree)/drivers/acpi/nfit/
  
  obj-m += nfit_test.o
  obj-m += nfit_test_iomap.o
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c

index bf6422a..a8ee5c4 100644 (file)
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -3164,7 +3164,9 @@ static __init int nfit_test_init(void)
         mcsafe_test();
         dax_pmem_test();
         dax_pmem_core_test();
+#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
         dax_pmem_compat_test();
+#endif
  
         nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
author	Dan Williams <dan.j.williams@intel.com>
	Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
committer	Dan Williams <dan.j.williams@intel.com>
	Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
MAINTAINERS		patch \| blob \| history
arch/powerpc/Kconfig		patch \| blob \| history
arch/powerpc/mm/ioremap.c		patch \| blob \| history
arch/powerpc/platforms/pseries/papr_scm.c		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/mm/numa.c		patch \| blob \| history
drivers/acpi/nfit/core.c		patch \| blob \| history
drivers/acpi/nfit/nfit.h		patch \| blob \| history
drivers/acpi/numa/srat.c		patch \| blob \| history
drivers/nvdimm/bus.c		patch \| blob \| history
drivers/nvdimm/dimm.c		patch \| blob \| history
drivers/nvdimm/dimm_devs.c		patch \| blob \| history
drivers/nvdimm/e820.c		patch \| blob \| history
drivers/nvdimm/namespace_devs.c		patch \| blob \| history
drivers/nvdimm/nd.h		patch \| blob \| history
drivers/nvdimm/pfn.h		patch \| blob \| history
drivers/nvdimm/pfn_devs.c		patch \| blob \| history
drivers/nvdimm/region_devs.c		patch \| blob \| history
include/linux/acpi.h		patch \| blob \| history
include/linux/io.h		patch \| blob \| history
include/linux/libnvdimm.h		patch \| blob \| history
include/linux/memremap.h		patch \| blob \| history
include/linux/mmzone.h		patch \| blob \| history
include/linux/numa.h		patch \| blob \| history
lib/Kconfig		patch \| blob \| history
mm/Kconfig		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history
mm/memremap.c		patch \| blob \| history
tools/testing/nvdimm/Kbuild		patch \| blob \| history
tools/testing/nvdimm/test/Kbuild		patch \| blob \| history
tools/testing/nvdimm/test/nfit.c		patch \| blob \| history