Merge tag 'cxl-for-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2022 01:07:03 +0000 (18:07 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2022 01:07:03 +0000 (18:07 -0700)
Pull CXL (Compute Express Link) updates from Dan Williams:
 "This development cycle extends the subsystem to discover CXL resources
  throughout a CXL/PCIe switch topology and respond to hot add/remove
  events anywhere in that topology.

  This is more foundational infrastructure in preparation for dynamic
  memory region provisioning support. Recall that CXL memory regions, as
  the new "Theory of Operation" section of
  Documentation/driver-api/cxl/memory-devices.rst describes, bring
  storage volume striping semantics to memory.

  The hot add/remove behavior is validated with extensions to the
  cxl_test unit test environment and this test in the cxl-cli test
  suite:

      https://github.com/pmem/ndctl/blob/djbw/for-74/cxl/test/cxl-topology.sh

  Summary:

   - Add a driver for 'struct cxl_memdev' objects responsible for
     CXL.mem operation as distinct from 'cxl_pci' mailbox operations.

     Its primary responsibility is enumerating an endpoint 'struct
     cxl_port' and all the 'struct cxl_port' instances between an
     endpoint and the CXL platform root.

   - Add a driver for 'struct cxl_port' objects responsible for
     enumerating and operating all Host-managed Device Memory (HDM)
     decoder resources between the platform-level CXL memory
     description, all intervening host bridges / switches, and the HDM
     resources in endpoints.

   - Update the cxl_pci driver to validate CXL.mem operation precursors
     to HDM decoder operation like ready-polling, and legacy CXL 1.1
     DVSEC based CXL.mem configuration.

   - Add basic lockdep coverage for usage of device_lock() on CXL
     subsystem objects similar to what exists for LIBNVDIMM. Include a
     compile-time switch for which subsystem to validate at run-time.

   - Update cxl_test to emulate a one level switch topology.

   - Document a "Theory of Operation" for the subsystem.

   - Add 'numa_node' and 'serial' attributes to cxl_memdev sysfs

   - Include miscellaneous fixes for spec / QEMU CXL emulation
     compatibility and static analysis reports"

* tag 'cxl-for-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (48 commits)
  cxl/core/port: Fix NULL but dereferenced coccicheck error
  cxl/port: Hold port reference until decoder release
  cxl/port: Fix endpoint refcount leak
  cxl/core: Fix cxl_device_lock() class detection
  cxl/core/port: Fix unregister_port() lock assertion
  cxl/regs: Fix size of CXL Capability Header Register
  cxl/core/port: Handle invalid decoders
  cxl/core/port: Fix / relax decoder target enumeration
  tools/testing/cxl: Add a physical_node link
  tools/testing/cxl: Enumerate mock decoders
  tools/testing/cxl: Mock one level of switches
  tools/testing/cxl: Fix root port to host bridge assignment
  tools/testing/cxl: Mock dvsec_ranges()
  cxl/core/port: Add endpoint decoders
  cxl/core: Move target_list out of base decoder attributes
  cxl/mem: Add the cxl_mem driver
  cxl/core/port: Add switch port enumeration
  cxl/memdev: Add numa_node attribute
  cxl/pci: Emit device serial number
  cxl/pci: Implement wait for media active
  ...

32 files changed:
Documentation/ABI/testing/sysfs-bus-cxl
Documentation/driver-api/cxl/memory-devices.rst
drivers/cxl/Kconfig
drivers/cxl/Makefile
drivers/cxl/acpi.c
drivers/cxl/core/Makefile
drivers/cxl/core/bus.c [deleted file]
drivers/cxl/core/core.h
drivers/cxl/core/hdm.c [new file with mode: 0644]
drivers/cxl/core/memdev.c
drivers/cxl/core/pci.c [new file with mode: 0644]
drivers/cxl/core/pmem.c
drivers/cxl/core/port.c [new file with mode: 0644]
drivers/cxl/core/regs.c
drivers/cxl/cxl.h
drivers/cxl/cxlmem.h
drivers/cxl/cxlpci.h [new file with mode: 0644]
drivers/cxl/mem.c [new file with mode: 0644]
drivers/cxl/pci.c
drivers/cxl/pci.h [deleted file]
drivers/cxl/pmem.c
drivers/cxl/port.c [new file with mode: 0644]
drivers/nvdimm/nd-core.h
lib/Kconfig.debug
tools/testing/cxl/Kbuild
tools/testing/cxl/mock_acpi.c
tools/testing/cxl/mock_mem.c [new file with mode: 0644]
tools/testing/cxl/mock_pmem.c [deleted file]
tools/testing/cxl/test/cxl.c
tools/testing/cxl/test/mem.c
tools/testing/cxl/test/mock.c
tools/testing/cxl/test/mock.h

index 0b6a2e6..7c2b846 100644 (file)
@@ -1,3 +1,12 @@
+What:          /sys/bus/cxl/flush
+Date:          Januarry, 2022
+KernelVersion: v5.18
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (WO) If userspace manually unbinds a port the kernel schedules
+               all descendant memdevs for unbind. Writing '1' to this attribute
+               flushes that work.
+
 What:          /sys/bus/cxl/devices/memX/firmware_version
 Date:          December, 2020
 KernelVersion: v5.12
@@ -25,6 +34,24 @@ Description:
                identically named field in the Identify Memory Device Output
                Payload in the CXL-2.0 specification.
 
+What:          /sys/bus/cxl/devices/memX/serial
+Date:          January, 2022
+KernelVersion: v5.18
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) 64-bit serial number per the PCIe Device Serial Number
+               capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
+               Memory Device PCIe Capabilities and Extended Capabilities.
+
+What:          /sys/bus/cxl/devices/memX/numa_node
+Date:          January, 2022
+KernelVersion: v5.18
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) If NUMA is enabled and the platform has affinitized the
+               host PCI device for this memory device, emit the CPU node
+               affinity for this device.
+
 What:          /sys/bus/cxl/devices/*/devtype
 Date:          June, 2021
 KernelVersion: v5.14
@@ -34,6 +61,15 @@ Description:
                the same value communicated in the DEVTYPE environment variable
                for uevents for devices on the "cxl" bus.
 
+What:          /sys/bus/cxl/devices/*/modalias
+Date:          December, 2021
+KernelVersion: v5.18
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               CXL device objects export the modalias attribute which mirrors
+               the same value communicated in the MODALIAS environment variable
+               for uevents for devices on the "cxl" bus.
+
 What:          /sys/bus/cxl/devices/portX/uport
 Date:          June, 2021
 KernelVersion: v5.14
index 3b8f413..db476bb 100644 (file)
@@ -14,6 +14,303 @@ that optionally define a device's contribution to an interleaved address
 range across multiple devices underneath a host-bridge or interleaved
 across host-bridges.
 
+CXL Bus: Theory of Operation
+============================
+Similar to how a RAID driver takes disk objects and assembles them into a new
+logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and
+assemble them into a CXL.mem decode topology. The need for runtime configuration
+of the CXL.mem topology is also similar to RAID in that different environments
+with the same hardware configuration may decide to assemble the topology in
+contrasting ways. One may choose performance (RAID0) striping memory across
+multiple Host Bridges and endpoints while another may opt for fault tolerance
+and disable any striping in the CXL.mem topology.
+
+Platform firmware enumerates a menu of interleave options at the "CXL root port"
+(Linux term for the top of the CXL decode topology). From there, PCIe topology
+dictates which endpoints can participate in which Host Bridge decode regimes.
+Each PCIe Switch in the path between the root and an endpoint introduces a point
+at which the interleave can be split. For example platform firmware may say at a
+given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn
+interleave cycles across multiple Root Ports. An intervening Switch between a
+port and an endpoint may interleave cycles across multiple Downstream Switch
+Ports, etc.
+
+Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test'
+module generates an emulated CXL topology of 2 Host Bridges each with 2 Root
+Ports. Each of those Root Ports are connected to 2-way switches with endpoints
+connected to those downstream ports for a total of 8 endpoints::
+
+    # cxl list -BEMPu -b cxl_test
+    {
+      "bus":"root3",
+      "provider":"cxl_test",
+      "ports:root3":[
+        {
+          "port":"port5",
+          "host":"cxl_host_bridge.1",
+          "ports:port5":[
+            {
+              "port":"port8",
+              "host":"cxl_switch_uport.1",
+              "endpoints:port8":[
+                {
+                  "endpoint":"endpoint9",
+                  "host":"mem2",
+                  "memdev":{
+                    "memdev":"mem2",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x1",
+                    "numa_node":1,
+                    "host":"cxl_mem.1"
+                  }
+                },
+                {
+                  "endpoint":"endpoint15",
+                  "host":"mem6",
+                  "memdev":{
+                    "memdev":"mem6",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x5",
+                    "numa_node":1,
+                    "host":"cxl_mem.5"
+                  }
+                }
+              ]
+            },
+            {
+              "port":"port12",
+              "host":"cxl_switch_uport.3",
+              "endpoints:port12":[
+                {
+                  "endpoint":"endpoint17",
+                  "host":"mem8",
+                  "memdev":{
+                    "memdev":"mem8",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x7",
+                    "numa_node":1,
+                    "host":"cxl_mem.7"
+                  }
+                },
+                {
+                  "endpoint":"endpoint13",
+                  "host":"mem4",
+                  "memdev":{
+                    "memdev":"mem4",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x3",
+                    "numa_node":1,
+                    "host":"cxl_mem.3"
+                  }
+                }
+              ]
+            }
+          ]
+        },
+        {
+          "port":"port4",
+          "host":"cxl_host_bridge.0",
+          "ports:port4":[
+            {
+              "port":"port6",
+              "host":"cxl_switch_uport.0",
+              "endpoints:port6":[
+                {
+                  "endpoint":"endpoint7",
+                  "host":"mem1",
+                  "memdev":{
+                    "memdev":"mem1",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0",
+                    "numa_node":0,
+                    "host":"cxl_mem.0"
+                  }
+                },
+                {
+                  "endpoint":"endpoint14",
+                  "host":"mem5",
+                  "memdev":{
+                    "memdev":"mem5",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x4",
+                    "numa_node":0,
+                    "host":"cxl_mem.4"
+                  }
+                }
+              ]
+            },
+            {
+              "port":"port10",
+              "host":"cxl_switch_uport.2",
+              "endpoints:port10":[
+                {
+                  "endpoint":"endpoint16",
+                  "host":"mem7",
+                  "memdev":{
+                    "memdev":"mem7",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x6",
+                    "numa_node":0,
+                    "host":"cxl_mem.6"
+                  }
+                },
+                {
+                  "endpoint":"endpoint11",
+                  "host":"mem3",
+                  "memdev":{
+                    "memdev":"mem3",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x2",
+                    "numa_node":0,
+                    "host":"cxl_mem.2"
+                  }
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+
+In that listing each "root", "port", and "endpoint" object correspond a kernel
+'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to
+its descendants. So "root" claims non-PCIe enumerable platform decode ranges and
+decodes them to "ports", "ports" decode to "endpoints", and "endpoints"
+represent the decode from SPA (System Physical Address) to DPA (Device Physical
+Address).
+
+Continuing the RAID analogy, disks have both topology metadata and on device
+metadata that determine RAID set assembly. CXL Port topology and CXL Port link
+status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated
+by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches
+the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port
+objects. Conversely for hot-unplug / removal scenarios, there is no need for
+the Linux PCI core to tear down switch-level CXL resources because the endpoint
+->remove() event cleans up the port data that was established to support that
+Memory Expander.
+
+The port metadata and potential decode schemes that a give memory device may
+participate can be determined via a command like::
+
+    # cxl list -BDMu -d root -m mem3
+    {
+      "bus":"root3",
+      "provider":"cxl_test",
+      "decoders:root3":[
+        {
+          "decoder":"decoder3.1",
+          "resource":"0x8030000000",
+          "size":"512.00 MiB (536.87 MB)",
+          "volatile_capable":true,
+          "nr_targets":2
+        },
+        {
+          "decoder":"decoder3.3",
+          "resource":"0x8060000000",
+          "size":"512.00 MiB (536.87 MB)",
+          "pmem_capable":true,
+          "nr_targets":2
+        },
+        {
+          "decoder":"decoder3.0",
+          "resource":"0x8020000000",
+          "size":"256.00 MiB (268.44 MB)",
+          "volatile_capable":true,
+          "nr_targets":1
+        },
+        {
+          "decoder":"decoder3.2",
+          "resource":"0x8050000000",
+          "size":"256.00 MiB (268.44 MB)",
+          "pmem_capable":true,
+          "nr_targets":1
+        }
+      ],
+      "memdevs:root3":[
+        {
+          "memdev":"mem3",
+          "pmem_size":"256.00 MiB (268.44 MB)",
+          "ram_size":"256.00 MiB (268.44 MB)",
+          "serial":"0x2",
+          "numa_node":0,
+          "host":"cxl_mem.2"
+        }
+      ]
+    }
+
+...which queries the CXL topology to ask "given CXL Memory Expander with a kernel
+device name of 'mem3' which platform level decode ranges may this device
+participate". A given expander can participate in multiple CXL.mem interleave
+sets simultaneously depending on how many decoder resource it has. In this
+example mem3 can participate in one or more of a PMEM interleave that spans to
+Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile
+memory interleave that spans 2 Host Bridges, and a Volatile memory interleave
+that only targets a single Host Bridge.
+
+Conversely the memory devices that can participate in a given platform level
+decode scheme can be determined via a command like the following::
+
+    # cxl list -MDu -d 3.2
+    [
+      {
+        "memdevs":[
+          {
+            "memdev":"mem1",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0",
+            "numa_node":0,
+            "host":"cxl_mem.0"
+          },
+          {
+            "memdev":"mem5",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0x4",
+            "numa_node":0,
+            "host":"cxl_mem.4"
+          },
+          {
+            "memdev":"mem7",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0x6",
+            "numa_node":0,
+            "host":"cxl_mem.6"
+          },
+          {
+            "memdev":"mem3",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0x2",
+            "numa_node":0,
+            "host":"cxl_mem.2"
+          }
+        ]
+      },
+      {
+        "root decoders":[
+          {
+            "decoder":"decoder3.2",
+            "resource":"0x8050000000",
+            "size":"256.00 MiB (268.44 MB)",
+            "pmem_capable":true,
+            "nr_targets":1
+          }
+        ]
+      }
+    ]
+
+...where the naming scheme for decoders is "decoder<port_id>.<instance_id>".
+
 Driver Infrastructure
 =====================
 
@@ -28,6 +325,14 @@ CXL Memory Device
 .. kernel-doc:: drivers/cxl/pci.c
    :internal:
 
+.. kernel-doc:: drivers/cxl/mem.c
+   :doc: cxl mem
+
+CXL Port
+--------
+.. kernel-doc:: drivers/cxl/port.c
+   :doc: cxl port
+
 CXL Core
 --------
 .. kernel-doc:: drivers/cxl/cxl.h
@@ -36,10 +341,16 @@ CXL Core
 .. kernel-doc:: drivers/cxl/cxl.h
    :internal:
 
-.. kernel-doc:: drivers/cxl/core/bus.c
+.. kernel-doc:: drivers/cxl/core/port.c
    :doc: cxl core
 
-.. kernel-doc:: drivers/cxl/core/bus.c
+.. kernel-doc:: drivers/cxl/core/port.c
+   :identifiers:
+
+.. kernel-doc:: drivers/cxl/core/pci.c
+   :doc: cxl core pci
+
+.. kernel-doc:: drivers/cxl/core/pci.c
    :identifiers:
 
 .. kernel-doc:: drivers/cxl/core/pmem.c
index 67c9137..b88ab95 100644 (file)
@@ -13,25 +13,26 @@ menuconfig CXL_BUS
 
 if CXL_BUS
 
-config CXL_MEM
-       tristate "CXL.mem: Memory Devices"
+config CXL_PCI
+       tristate "PCI manageability"
        default CXL_BUS
        help
-         The CXL.mem protocol allows a device to act as a provider of
-         "System RAM" and/or "Persistent Memory" that is fully coherent
-         as if the memory was attached to the typical CPU memory
-         controller.
+         The CXL specification defines a "CXL memory device" sub-class in the
+         PCI "memory controller" base class of devices. Device's identified by
+         this class code provide support for volatile and / or persistent
+         memory to be mapped into the system address map (Host-managed Device
+         Memory (HDM)).
 
-         Say 'y/m' to enable a driver that will attach to CXL.mem devices for
-         configuration and management primarily via the mailbox interface. See
-         Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more
-         details.
+         Say 'y/m' to enable a driver that will attach to CXL memory expander
+         devices enumerated by the memory device class code for configuration
+         and management primarily via the mailbox interface. See Chapter 2.3
+         Type 3 CXL Device in the CXL 2.0 specification for more details.
 
          If unsure say 'm'.
 
 config CXL_MEM_RAW_COMMANDS
        bool "RAW Command Interface for Memory Devices"
-       depends on CXL_MEM
+       depends on CXL_PCI
        help
          Enable CXL RAW command interface.
 
@@ -76,4 +77,25 @@ config CXL_PMEM
          provisioning the persistent memory capacity of CXL memory expanders.
 
          If unsure say 'm'.
+
+config CXL_MEM
+       tristate "CXL: Memory Expansion"
+       depends on CXL_PCI
+       default CXL_BUS
+       help
+         The CXL.mem protocol allows a device to act as a provider of "System
+         RAM" and/or "Persistent Memory" that is fully coherent as if the
+         memory were attached to the typical CPU memory controller. This is
+         known as HDM "Host-managed Device Memory".
+
+         Say 'y/m' to enable a driver that will attach to CXL.mem devices for
+         memory expansion and control of HDM. See Chapter 9.13 in the CXL 2.0
+         specification for a detailed description of HDM.
+
+         If unsure say 'm'.
+
+config CXL_PORT
+       default CXL_BUS
+       tristate
+
 endif
index d1aaabc..ce267ef 100644 (file)
@@ -1,9 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CXL_BUS) += core/
-obj-$(CONFIG_CXL_MEM) += cxl_pci.o
+obj-$(CONFIG_CXL_PCI) += cxl_pci.o
+obj-$(CONFIG_CXL_MEM) += cxl_mem.o
 obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
 obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
+obj-$(CONFIG_CXL_PORT) += cxl_port.o
 
+cxl_mem-y := mem.o
 cxl_pci-y := pci.o
 cxl_acpi-y := acpi.o
 cxl_pmem-y := pmem.o
+cxl_port-y := port.o
index 3163167..d15a6ae 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include "cxlpci.h"
 #include "cxl.h"
 
 /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
@@ -14,7 +15,7 @@
 
 static unsigned long cfmws_to_decoder_flags(int restrictions)
 {
-       unsigned long flags = 0;
+       unsigned long flags = CXL_DECODER_F_ENABLE;
 
        if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2)
                flags |= CXL_DECODER_F_TYPE2;
@@ -101,16 +102,14 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
        for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++)
                target_map[i] = cfmws->interleave_targets[i];
 
-       cxld = cxl_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws));
+       cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws));
        if (IS_ERR(cxld))
                return 0;
 
        cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
        cxld->target_type = CXL_DECODER_EXPANDER;
-       cxld->range = (struct range){
-               .start = cfmws->base_hpa,
-               .end = cfmws->base_hpa + cfmws->window_size - 1,
-       };
+       cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa,
+                                                            cfmws->window_size);
        cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws);
        cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws);
 
@@ -120,67 +119,17 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
        else
                rc = cxl_decoder_autoremove(dev, cxld);
        if (rc) {
-               dev_err(dev, "Failed to add decoder for %#llx-%#llx\n",
-                       cfmws->base_hpa,
-                       cfmws->base_hpa + cfmws->window_size - 1);
+               dev_err(dev, "Failed to add decoder for %pr\n",
+                       &cxld->platform_res);
                return 0;
        }
-       dev_dbg(dev, "add: %s node: %d range %#llx-%#llx\n",
-               dev_name(&cxld->dev), phys_to_target_node(cxld->range.start),
-               cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1);
+       dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev),
+               phys_to_target_node(cxld->platform_res.start),
+               &cxld->platform_res);
 
        return 0;
 }
 
-__mock int match_add_root_ports(struct pci_dev *pdev, void *data)
-{
-       struct cxl_walk_context *ctx = data;
-       struct pci_bus *root_bus = ctx->root;
-       struct cxl_port *port = ctx->port;
-       int type = pci_pcie_type(pdev);
-       struct device *dev = ctx->dev;
-       u32 lnkcap, port_num;
-       int rc;
-
-       if (pdev->bus != root_bus)
-               return 0;
-       if (!pci_is_pcie(pdev))
-               return 0;
-       if (type != PCI_EXP_TYPE_ROOT_PORT)
-               return 0;
-       if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
-                                 &lnkcap) != PCIBIOS_SUCCESSFUL)
-               return 0;
-
-       /* TODO walk DVSEC to find component register base */
-       port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-       rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE);
-       if (rc) {
-               ctx->error = rc;
-               return rc;
-       }
-       ctx->count++;
-
-       dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
-
-       return 0;
-}
-
-static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
-{
-       struct cxl_dport *dport;
-
-       device_lock(&port->dev);
-       list_for_each_entry(dport, &port->dports, list)
-               if (dport->dport == dev) {
-                       device_unlock(&port->dev);
-                       return dport;
-               }
-
-       device_unlock(&port->dev);
-       return NULL;
-}
-
 __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
                                              struct device *dev)
 {
@@ -204,83 +153,35 @@ static int add_host_bridge_uport(struct device *match, void *arg)
        struct device *host = root_port->dev.parent;
        struct acpi_device *bridge = to_cxl_host_bridge(host, match);
        struct acpi_pci_root *pci_root;
-       struct cxl_walk_context ctx;
-       int single_port_map[1], rc;
-       struct cxl_decoder *cxld;
        struct cxl_dport *dport;
        struct cxl_port *port;
+       int rc;
 
        if (!bridge)
                return 0;
 
-       dport = find_dport_by_dev(root_port, match);
+       dport = cxl_find_dport_by_dev(root_port, match);
        if (!dport) {
                dev_dbg(host, "host bridge expected and not found\n");
                return 0;
        }
 
-       port = devm_cxl_add_port(host, match, dport->component_reg_phys,
-                                root_port);
-       if (IS_ERR(port))
-               return PTR_ERR(port);
-       dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
-
        /*
         * Note that this lookup already succeeded in
         * to_cxl_host_bridge(), so no need to check for failure here
         */
        pci_root = acpi_pci_find_root(bridge->handle);
-       ctx = (struct cxl_walk_context){
-               .dev = host,
-               .root = pci_root->bus,
-               .port = port,
-       };
-       pci_walk_bus(pci_root->bus, match_add_root_ports, &ctx);
-
-       if (ctx.count == 0)
-               return -ENODEV;
-       if (ctx.error)
-               return ctx.error;
-       if (ctx.count > 1)
-               return 0;
-
-       /* TODO: Scan CHBCR for HDM Decoder resources */
-
-       /*
-        * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability
-        * Structure) single ported host-bridges need not publish a decoder
-        * capability when a passthrough decode can be assumed, i.e. all
-        * transactions that the uport sees are claimed and passed to the single
-        * dport. Disable the range until the first CXL region is enumerated /
-        * activated.
-        */
-       cxld = cxl_decoder_alloc(port, 1);
-       if (IS_ERR(cxld))
-               return PTR_ERR(cxld);
-
-       cxld->interleave_ways = 1;
-       cxld->interleave_granularity = PAGE_SIZE;
-       cxld->target_type = CXL_DECODER_EXPANDER;
-       cxld->range = (struct range) {
-               .start = 0,
-               .end = -1,
-       };
-
-       device_lock(&port->dev);
-       dport = list_first_entry(&port->dports, typeof(*dport), list);
-       device_unlock(&port->dev);
-
-       single_port_map[0] = dport->port_id;
-
-       rc = cxl_decoder_add(cxld, single_port_map);
+       rc = devm_cxl_register_pci_bus(host, match, pci_root->bus);
        if (rc)
-               put_device(&cxld->dev);
-       else
-               rc = cxl_decoder_autoremove(host, cxld);
+               return rc;
+
+       port = devm_cxl_add_port(host, match, dport->component_reg_phys,
+                                root_port);
+       if (IS_ERR(port))
+               return PTR_ERR(port);
+       dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
 
-       if (rc == 0)
-               dev_dbg(host, "add: %s\n", dev_name(&cxld->dev));
-       return rc;
+       return 0;
 }
 
 struct cxl_chbs_context {
@@ -309,9 +210,9 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
 
 static int add_host_bridge_dport(struct device *match, void *arg)
 {
-       int rc;
        acpi_status status;
        unsigned long long uid;
+       struct cxl_dport *dport;
        struct cxl_chbs_context ctx;
        struct cxl_port *root_port = arg;
        struct device *host = root_port->dev.parent;
@@ -340,11 +241,11 @@ static int add_host_bridge_dport(struct device *match, void *arg)
                return 0;
        }
 
-       rc = cxl_add_dport(root_port, match, uid, ctx.chbcr);
-       if (rc) {
+       dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr);
+       if (IS_ERR(dport)) {
                dev_err(host, "failed to add downstream port: %s\n",
                        dev_name(match));
-               return rc;
+               return PTR_ERR(dport);
        }
        dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match));
        return 0;
@@ -413,7 +314,8 @@ static int cxl_acpi_probe(struct platform_device *pdev)
        if (rc < 0)
                return rc;
 
-       return 0;
+       /* In case PCI is scanned before ACPI re-trigger memdev attach */
+       return cxl_bus_rescan();
 }
 
 static const struct acpi_device_id cxl_acpi_ids[] = {
index 40ab503..6d37cd7 100644 (file)
@@ -2,8 +2,10 @@
 obj-$(CONFIG_CXL_BUS) += cxl_core.o
 
 ccflags-y += -I$(srctree)/drivers/cxl
-cxl_core-y := bus.o
+cxl_core-y := port.o
 cxl_core-y += pmem.o
 cxl_core-y += regs.o
 cxl_core-y += memdev.o
 cxl_core-y += mbox.o
+cxl_core-y += pci.o
+cxl_core-y += hdm.o
diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
deleted file mode 100644 (file)
index 3f9b98e..0000000
+++ /dev/null
@@ -1,675 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/idr.h>
-#include <cxlmem.h>
-#include <cxl.h>
-#include "core.h"
-
-/**
- * DOC: cxl core
- *
- * The CXL core provides a set of interfaces that can be consumed by CXL aware
- * drivers. The interfaces allow for creation, modification, and destruction of
- * regions, memory devices, ports, and decoders. CXL aware drivers must register
- * with the CXL core via these interfaces in order to be able to participate in
- * cross-device interleave coordination. The CXL core also establishes and
- * maintains the bridge to the nvdimm subsystem.
- *
- * CXL core introduces sysfs hierarchy to control the devices that are
- * instantiated by the core.
- */
-
-static DEFINE_IDA(cxl_port_ida);
-
-static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
-                           char *buf)
-{
-       return sysfs_emit(buf, "%s\n", dev->type->name);
-}
-static DEVICE_ATTR_RO(devtype);
-
-static struct attribute *cxl_base_attributes[] = {
-       &dev_attr_devtype.attr,
-       NULL,
-};
-
-struct attribute_group cxl_base_attribute_group = {
-       .attrs = cxl_base_attributes,
-};
-
-static ssize_t start_show(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-       return sysfs_emit(buf, "%#llx\n", cxld->range.start);
-}
-static DEVICE_ATTR_RO(start);
-
-static ssize_t size_show(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-       return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range));
-}
-static DEVICE_ATTR_RO(size);
-
-#define CXL_DECODER_FLAG_ATTR(name, flag)                            \
-static ssize_t name##_show(struct device *dev,                       \
-                          struct device_attribute *attr, char *buf) \
-{                                                                    \
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
-                                                                     \
-       return sysfs_emit(buf, "%s\n",                               \
-                         (cxld->flags & (flag)) ? "1" : "0");       \
-}                                                                    \
-static DEVICE_ATTR_RO(name)
-
-CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
-CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
-CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
-CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
-CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
-
-static ssize_t target_type_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-       switch (cxld->target_type) {
-       case CXL_DECODER_ACCELERATOR:
-               return sysfs_emit(buf, "accelerator\n");
-       case CXL_DECODER_EXPANDER:
-               return sysfs_emit(buf, "expander\n");
-       }
-       return -ENXIO;
-}
-static DEVICE_ATTR_RO(target_type);
-
-static ssize_t target_list_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-       ssize_t offset = 0;
-       int i, rc = 0;
-
-       device_lock(dev);
-       for (i = 0; i < cxld->interleave_ways; i++) {
-               struct cxl_dport *dport = cxld->target[i];
-               struct cxl_dport *next = NULL;
-
-               if (!dport)
-                       break;
-
-               if (i + 1 < cxld->interleave_ways)
-                       next = cxld->target[i + 1];
-               rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
-                                  next ? "," : "");
-               if (rc < 0)
-                       break;
-               offset += rc;
-       }
-       device_unlock(dev);
-
-       if (rc < 0)
-               return rc;
-
-       rc = sysfs_emit_at(buf, offset, "\n");
-       if (rc < 0)
-               return rc;
-
-       return offset + rc;
-}
-static DEVICE_ATTR_RO(target_list);
-
-static struct attribute *cxl_decoder_base_attrs[] = {
-       &dev_attr_start.attr,
-       &dev_attr_size.attr,
-       &dev_attr_locked.attr,
-       &dev_attr_target_list.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_decoder_base_attribute_group = {
-       .attrs = cxl_decoder_base_attrs,
-};
-
-static struct attribute *cxl_decoder_root_attrs[] = {
-       &dev_attr_cap_pmem.attr,
-       &dev_attr_cap_ram.attr,
-       &dev_attr_cap_type2.attr,
-       &dev_attr_cap_type3.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_decoder_root_attribute_group = {
-       .attrs = cxl_decoder_root_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
-       &cxl_decoder_root_attribute_group,
-       &cxl_decoder_base_attribute_group,
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static struct attribute *cxl_decoder_switch_attrs[] = {
-       &dev_attr_target_type.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_decoder_switch_attribute_group = {
-       .attrs = cxl_decoder_switch_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
-       &cxl_decoder_switch_attribute_group,
-       &cxl_decoder_base_attribute_group,
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static void cxl_decoder_release(struct device *dev)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-       struct cxl_port *port = to_cxl_port(dev->parent);
-
-       ida_free(&port->decoder_ida, cxld->id);
-       kfree(cxld);
-}
-
-static const struct device_type cxl_decoder_switch_type = {
-       .name = "cxl_decoder_switch",
-       .release = cxl_decoder_release,
-       .groups = cxl_decoder_switch_attribute_groups,
-};
-
-static const struct device_type cxl_decoder_root_type = {
-       .name = "cxl_decoder_root",
-       .release = cxl_decoder_release,
-       .groups = cxl_decoder_root_attribute_groups,
-};
-
-bool is_root_decoder(struct device *dev)
-{
-       return dev->type == &cxl_decoder_root_type;
-}
-EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL);
-
-struct cxl_decoder *to_cxl_decoder(struct device *dev)
-{
-       if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
-                         "not a cxl_decoder device\n"))
-               return NULL;
-       return container_of(dev, struct cxl_decoder, dev);
-}
-EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
-
-static void cxl_dport_release(struct cxl_dport *dport)
-{
-       list_del(&dport->list);
-       put_device(dport->dport);
-       kfree(dport);
-}
-
-static void cxl_port_release(struct device *dev)
-{
-       struct cxl_port *port = to_cxl_port(dev);
-       struct cxl_dport *dport, *_d;
-
-       device_lock(dev);
-       list_for_each_entry_safe(dport, _d, &port->dports, list)
-               cxl_dport_release(dport);
-       device_unlock(dev);
-       ida_free(&cxl_port_ida, port->id);
-       kfree(port);
-}
-
-static const struct attribute_group *cxl_port_attribute_groups[] = {
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static const struct device_type cxl_port_type = {
-       .name = "cxl_port",
-       .release = cxl_port_release,
-       .groups = cxl_port_attribute_groups,
-};
-
-struct cxl_port *to_cxl_port(struct device *dev)
-{
-       if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
-                         "not a cxl_port device\n"))
-               return NULL;
-       return container_of(dev, struct cxl_port, dev);
-}
-
-static void unregister_port(void *_port)
-{
-       struct cxl_port *port = _port;
-       struct cxl_dport *dport;
-
-       device_lock(&port->dev);
-       list_for_each_entry(dport, &port->dports, list) {
-               char link_name[CXL_TARGET_STRLEN];
-
-               if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
-                            dport->port_id) >= CXL_TARGET_STRLEN)
-                       continue;
-               sysfs_remove_link(&port->dev.kobj, link_name);
-       }
-       device_unlock(&port->dev);
-       device_unregister(&port->dev);
-}
-
-static void cxl_unlink_uport(void *_port)
-{
-       struct cxl_port *port = _port;
-
-       sysfs_remove_link(&port->dev.kobj, "uport");
-}
-
-static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
-{
-       int rc;
-
-       rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
-       if (rc)
-               return rc;
-       return devm_add_action_or_reset(host, cxl_unlink_uport, port);
-}
-
-static struct cxl_port *cxl_port_alloc(struct device *uport,
-                                      resource_size_t component_reg_phys,
-                                      struct cxl_port *parent_port)
-{
-       struct cxl_port *port;
-       struct device *dev;
-       int rc;
-
-       port = kzalloc(sizeof(*port), GFP_KERNEL);
-       if (!port)
-               return ERR_PTR(-ENOMEM);
-
-       rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
-       if (rc < 0)
-               goto err;
-       port->id = rc;
-
-       /*
-        * The top-level cxl_port "cxl_root" does not have a cxl_port as
-        * its parent and it does not have any corresponding component
-        * registers as its decode is described by a fixed platform
-        * description.
-        */
-       dev = &port->dev;
-       if (parent_port)
-               dev->parent = &parent_port->dev;
-       else
-               dev->parent = uport;
-
-       port->uport = uport;
-       port->component_reg_phys = component_reg_phys;
-       ida_init(&port->decoder_ida);
-       INIT_LIST_HEAD(&port->dports);
-
-       device_initialize(dev);
-       device_set_pm_not_required(dev);
-       dev->bus = &cxl_bus_type;
-       dev->type = &cxl_port_type;
-
-       return port;
-
-err:
-       kfree(port);
-       return ERR_PTR(rc);
-}
-
-/**
- * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
- * @host: host device for devm operations
- * @uport: "physical" device implementing this upstream port
- * @component_reg_phys: (optional) for configurable cxl_port instances
- * @parent_port: next hop up in the CXL memory decode hierarchy
- */
-struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
-                                  resource_size_t component_reg_phys,
-                                  struct cxl_port *parent_port)
-{
-       struct cxl_port *port;
-       struct device *dev;
-       int rc;
-
-       port = cxl_port_alloc(uport, component_reg_phys, parent_port);
-       if (IS_ERR(port))
-               return port;
-
-       dev = &port->dev;
-       if (parent_port)
-               rc = dev_set_name(dev, "port%d", port->id);
-       else
-               rc = dev_set_name(dev, "root%d", port->id);
-       if (rc)
-               goto err;
-
-       rc = device_add(dev);
-       if (rc)
-               goto err;
-
-       rc = devm_add_action_or_reset(host, unregister_port, port);
-       if (rc)
-               return ERR_PTR(rc);
-
-       rc = devm_cxl_link_uport(host, port);
-       if (rc)
-               return ERR_PTR(rc);
-
-       return port;
-
-err:
-       put_device(dev);
-       return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
-
-static struct cxl_dport *find_dport(struct cxl_port *port, int id)
-{
-       struct cxl_dport *dport;
-
-       device_lock_assert(&port->dev);
-       list_for_each_entry (dport, &port->dports, list)
-               if (dport->port_id == id)
-                       return dport;
-       return NULL;
-}
-
-static int add_dport(struct cxl_port *port, struct cxl_dport *new)
-{
-       struct cxl_dport *dup;
-
-       device_lock(&port->dev);
-       dup = find_dport(port, new->port_id);
-       if (dup)
-               dev_err(&port->dev,
-                       "unable to add dport%d-%s non-unique port id (%s)\n",
-                       new->port_id, dev_name(new->dport),
-                       dev_name(dup->dport));
-       else
-               list_add_tail(&new->list, &port->dports);
-       device_unlock(&port->dev);
-
-       return dup ? -EEXIST : 0;
-}
-
-/**
- * cxl_add_dport - append downstream port data to a cxl_port
- * @port: the cxl_port that references this dport
- * @dport_dev: firmware or PCI device representing the dport
- * @port_id: identifier for this dport in a decoder's target list
- * @component_reg_phys: optional location of CXL component registers
- *
- * Note that all allocations and links are undone by cxl_port deletion
- * and release.
- */
-int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
-                 resource_size_t component_reg_phys)
-{
-       char link_name[CXL_TARGET_STRLEN];
-       struct cxl_dport *dport;
-       int rc;
-
-       if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
-           CXL_TARGET_STRLEN)
-               return -EINVAL;
-
-       dport = kzalloc(sizeof(*dport), GFP_KERNEL);
-       if (!dport)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&dport->list);
-       dport->dport = get_device(dport_dev);
-       dport->port_id = port_id;
-       dport->component_reg_phys = component_reg_phys;
-       dport->port = port;
-
-       rc = add_dport(port, dport);
-       if (rc)
-               goto err;
-
-       rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
-       if (rc)
-               goto err;
-
-       return 0;
-err:
-       cxl_dport_release(dport);
-       return rc;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL);
-
-static int decoder_populate_targets(struct cxl_decoder *cxld,
-                                   struct cxl_port *port, int *target_map)
-{
-       int rc = 0, i;
-
-       if (!target_map)
-               return 0;
-
-       device_lock(&port->dev);
-       if (list_empty(&port->dports)) {
-               rc = -EINVAL;
-               goto out_unlock;
-       }
-
-       for (i = 0; i < cxld->nr_targets; i++) {
-               struct cxl_dport *dport = find_dport(port, target_map[i]);
-
-               if (!dport) {
-                       rc = -ENXIO;
-                       goto out_unlock;
-               }
-               cxld->target[i] = dport;
-       }
-
-out_unlock:
-       device_unlock(&port->dev);
-
-       return rc;
-}
-
-struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets)
-{
-       struct cxl_decoder *cxld;
-       struct device *dev;
-       int rc = 0;
-
-       if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1)
-               return ERR_PTR(-EINVAL);
-
-       cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
-       if (!cxld)
-               return ERR_PTR(-ENOMEM);
-
-       rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
-       if (rc < 0)
-               goto err;
-
-       cxld->id = rc;
-       cxld->nr_targets = nr_targets;
-       dev = &cxld->dev;
-       device_initialize(dev);
-       device_set_pm_not_required(dev);
-       dev->parent = &port->dev;
-       dev->bus = &cxl_bus_type;
-
-       /* root ports do not have a cxl_port_type parent */
-       if (port->dev.parent->type == &cxl_port_type)
-               dev->type = &cxl_decoder_switch_type;
-       else
-               dev->type = &cxl_decoder_root_type;
-
-       return cxld;
-err:
-       kfree(cxld);
-       return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_decoder_alloc, CXL);
-
-int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
-{
-       struct cxl_port *port;
-       struct device *dev;
-       int rc;
-
-       if (WARN_ON_ONCE(!cxld))
-               return -EINVAL;
-
-       if (WARN_ON_ONCE(IS_ERR(cxld)))
-               return PTR_ERR(cxld);
-
-       if (cxld->interleave_ways < 1)
-               return -EINVAL;
-
-       port = to_cxl_port(cxld->dev.parent);
-       rc = decoder_populate_targets(cxld, port, target_map);
-       if (rc)
-               return rc;
-
-       dev = &cxld->dev;
-       rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
-       if (rc)
-               return rc;
-
-       return device_add(dev);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
-
-static void cxld_unregister(void *dev)
-{
-       device_unregister(dev);
-}
-
-int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
-{
-       return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL);
-
-/**
- * __cxl_driver_register - register a driver for the cxl bus
- * @cxl_drv: cxl driver structure to attach
- * @owner: owning module/driver
- * @modname: KBUILD_MODNAME for parent driver
- */
-int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
-                         const char *modname)
-{
-       if (!cxl_drv->probe) {
-               pr_debug("%s ->probe() must be specified\n", modname);
-               return -EINVAL;
-       }
-
-       if (!cxl_drv->name) {
-               pr_debug("%s ->name must be specified\n", modname);
-               return -EINVAL;
-       }
-
-       if (!cxl_drv->id) {
-               pr_debug("%s ->id must be specified\n", modname);
-               return -EINVAL;
-       }
-
-       cxl_drv->drv.bus = &cxl_bus_type;
-       cxl_drv->drv.owner = owner;
-       cxl_drv->drv.mod_name = modname;
-       cxl_drv->drv.name = cxl_drv->name;
-
-       return driver_register(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL);
-
-void cxl_driver_unregister(struct cxl_driver *cxl_drv)
-{
-       driver_unregister(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL);
-
-static int cxl_device_id(struct device *dev)
-{
-       if (dev->type == &cxl_nvdimm_bridge_type)
-               return CXL_DEVICE_NVDIMM_BRIDGE;
-       if (dev->type == &cxl_nvdimm_type)
-               return CXL_DEVICE_NVDIMM;
-       return 0;
-}
-
-static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-       return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
-                             cxl_device_id(dev));
-}
-
-static int cxl_bus_match(struct device *dev, struct device_driver *drv)
-{
-       return cxl_device_id(dev) == to_cxl_drv(drv)->id;
-}
-
-static int cxl_bus_probe(struct device *dev)
-{
-       return to_cxl_drv(dev->driver)->probe(dev);
-}
-
-static void cxl_bus_remove(struct device *dev)
-{
-       struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
-
-       if (cxl_drv->remove)
-               cxl_drv->remove(dev);
-}
-
-struct bus_type cxl_bus_type = {
-       .name = "cxl",
-       .uevent = cxl_bus_uevent,
-       .match = cxl_bus_match,
-       .probe = cxl_bus_probe,
-       .remove = cxl_bus_remove,
-};
-EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
-
-static __init int cxl_core_init(void)
-{
-       int rc;
-
-       cxl_mbox_init();
-
-       rc = cxl_memdev_init();
-       if (rc)
-               return rc;
-
-       rc = bus_register(&cxl_bus_type);
-       if (rc)
-               goto err;
-       return 0;
-
-err:
-       cxl_memdev_exit();
-       cxl_mbox_exit();
-       return rc;
-}
-
-static void cxl_core_exit(void)
-{
-       bus_unregister(&cxl_bus_type);
-       cxl_memdev_exit();
-       cxl_mbox_exit();
-}
-
-module_init(cxl_core_init);
-module_exit(cxl_core_exit);
-MODULE_LICENSE("GPL v2");
index e0c9aac..1a50c0f 100644 (file)
@@ -14,6 +14,8 @@ struct cxl_mem_query_commands;
 int cxl_query_cmd(struct cxl_memdev *cxlmd,
                  struct cxl_mem_query_commands __user *q);
 int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+                                  resource_size_t length);
 
 int cxl_memdev_init(void);
 void cxl_memdev_exit(void);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
new file mode 100644 (file)
index 0000000..0e89a7a
--- /dev/null
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include "cxlmem.h"
+#include "core.h"
+
+/**
+ * DOC: cxl core hdm
+ *
+ * Compute Express Link Host Managed Device Memory, starting with the
+ * CXL 2.0 specification, is managed by an array of HDM Decoder register
+ * instances per CXL port and per CXL endpoint. Define common helpers
+ * for enumerating these registers and capabilities.
+ */
+
+static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
+                          int *target_map)
+{
+       int rc;
+
+       rc = cxl_decoder_add_locked(cxld, target_map);
+       if (rc) {
+               put_device(&cxld->dev);
+               dev_err(&port->dev, "Failed to add decoder\n");
+               return rc;
+       }
+
+       rc = cxl_decoder_autoremove(&port->dev, cxld);
+       if (rc)
+               return rc;
+
+       dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+
+       return 0;
+}
+
+/*
+ * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure)
+ * single ported host-bridges need not publish a decoder capability when a
+ * passthrough decode can be assumed, i.e. all transactions that the uport sees
+ * are claimed and passed to the single dport. Disable the range until the first
+ * CXL region is enumerated / activated.
+ */
+int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+{
+       struct cxl_decoder *cxld;
+       struct cxl_dport *dport;
+       int single_port_map[1];
+
+       cxld = cxl_switch_decoder_alloc(port, 1);
+       if (IS_ERR(cxld))
+               return PTR_ERR(cxld);
+
+       device_lock_assert(&port->dev);
+
+       dport = list_first_entry(&port->dports, typeof(*dport), list);
+       single_port_map[0] = dport->port_id;
+
+       return add_hdm_decoder(port, cxld, single_port_map);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL);
+
+static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
+{
+       u32 hdm_cap;
+
+       hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET);
+       cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap);
+       cxlhdm->target_count =
+               FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap);
+       if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap))
+               cxlhdm->interleave_mask |= GENMASK(11, 8);
+       if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
+               cxlhdm->interleave_mask |= GENMASK(14, 12);
+}
+
+static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
+                                         void __iomem *crb)
+{
+       struct cxl_component_reg_map map;
+
+       cxl_probe_component_regs(&port->dev, crb, &map);
+       if (!map.hdm_decoder.valid) {
+               dev_err(&port->dev, "HDM decoder registers invalid\n");
+               return IOMEM_ERR_PTR(-ENXIO);
+       }
+
+       return crb + map.hdm_decoder.offset;
+}
+
+/**
+ * devm_cxl_setup_hdm - map HDM decoder component registers
+ * @port: cxl_port to map
+ */
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
+{
+       struct device *dev = &port->dev;
+       void __iomem *crb, *hdm;
+       struct cxl_hdm *cxlhdm;
+
+       cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
+       if (!cxlhdm)
+               return ERR_PTR(-ENOMEM);
+
+       cxlhdm->port = port;
+       crb = devm_cxl_iomap_block(dev, port->component_reg_phys,
+                                  CXL_COMPONENT_REG_BLOCK_SIZE);
+       if (!crb) {
+               dev_err(dev, "No component registers mapped\n");
+               return ERR_PTR(-ENXIO);
+       }
+
+       hdm = map_hdm_decoder_regs(port, crb);
+       if (IS_ERR(hdm))
+               return ERR_CAST(hdm);
+       cxlhdm->regs.hdm_decoder = hdm;
+
+       parse_hdm_decoder_caps(cxlhdm);
+       if (cxlhdm->decoder_count == 0) {
+               dev_err(dev, "Spec violation. Caps invalid\n");
+               return ERR_PTR(-ENXIO);
+       }
+
+       return cxlhdm;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL);
+
+static int to_interleave_granularity(u32 ctrl)
+{
+       int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl);
+
+       return 256 << val;
+}
+
+static int to_interleave_ways(u32 ctrl)
+{
+       int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl);
+
+       switch (val) {
+       case 0 ... 4:
+               return 1 << val;
+       case 8 ... 10:
+               return 3 << (val - 8);
+       default:
+               return 0;
+       }
+}
+
+static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
+                           int *target_map, void __iomem *hdm, int which)
+{
+       u64 size, base;
+       u32 ctrl;
+       int i;
+       union {
+               u64 value;
+               unsigned char target_id[8];
+       } target_list;
+
+       ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
+       base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
+       size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+
+       if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED))
+               size = 0;
+       if (base == U64_MAX || size == U64_MAX) {
+               dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n",
+                        port->id, cxld->id);
+               return -ENXIO;
+       }
+
+       cxld->decoder_range = (struct range) {
+               .start = base,
+               .end = base + size - 1,
+       };
+
+       /* switch decoders are always enabled if committed */
+       if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) {
+               cxld->flags |= CXL_DECODER_F_ENABLE;
+               if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
+                       cxld->flags |= CXL_DECODER_F_LOCK;
+       }
+       cxld->interleave_ways = to_interleave_ways(ctrl);
+       if (!cxld->interleave_ways) {
+               dev_warn(&port->dev,
+                        "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n",
+                        port->id, cxld->id, ctrl);
+               return -ENXIO;
+       }
+       cxld->interleave_granularity = to_interleave_granularity(ctrl);
+
+       if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl))
+               cxld->target_type = CXL_DECODER_EXPANDER;
+       else
+               cxld->target_type = CXL_DECODER_ACCELERATOR;
+
+       if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent)))
+               return 0;
+
+       target_list.value =
+               ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+       for (i = 0; i < cxld->interleave_ways; i++)
+               target_map[i] = target_list.target_id[i];
+
+       return 0;
+}
+
+/**
+ * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
+ * @cxlhdm: Structure to populate with HDM capabilities
+ */
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
+{
+       void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+       struct cxl_port *port = cxlhdm->port;
+       int i, committed, failed;
+       u32 ctrl;
+
+       /*
+        * Since the register resource was recently claimed via request_region()
+        * be careful about trusting the "not-committed" status until the commit
+        * timeout has elapsed.  The commit timeout is 10ms (CXL 2.0
+        * 8.2.5.12.20), but double it to be tolerant of any clock skew between
+        * host and target.
+        */
+       for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) {
+               ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+               if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)
+                       committed++;
+       }
+
+       /* ensure that future checks of committed can be trusted */
+       if (committed != cxlhdm->decoder_count)
+               msleep(20);
+
+       for (i = 0, failed = 0; i < cxlhdm->decoder_count; i++) {
+               int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
+               int rc, target_count = cxlhdm->target_count;
+               struct cxl_decoder *cxld;
+
+               if (is_cxl_endpoint(port))
+                       cxld = cxl_endpoint_decoder_alloc(port);
+               else
+                       cxld = cxl_switch_decoder_alloc(port, target_count);
+               if (IS_ERR(cxld)) {
+                       dev_warn(&port->dev,
+                                "Failed to allocate the decoder\n");
+                       return PTR_ERR(cxld);
+               }
+
+               rc = init_hdm_decoder(port, cxld, target_map,
+                                     cxlhdm->regs.hdm_decoder, i);
+               if (rc) {
+                       put_device(&cxld->dev);
+                       failed++;
+                       continue;
+               }
+               rc = add_hdm_decoder(port, cxld, target_map);
+               if (rc) {
+                       dev_warn(&port->dev,
+                                "Failed to add decoder to port\n");
+                       return rc;
+               }
+       }
+
+       if (failed == cxlhdm->decoder_count) {
+               dev_err(&port->dev, "No valid decoders found\n");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL);
index 61029cb..1f76b28 100644 (file)
@@ -89,10 +89,29 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
 static struct device_attribute dev_attr_pmem_size =
        __ATTR(size, 0444, pmem_size_show, NULL);
 
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+       return sysfs_emit(buf, "%#llx\n", cxlds->serial);
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", dev_to_node(dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
 static struct attribute *cxl_memdev_attributes[] = {
+       &dev_attr_serial.attr,
        &dev_attr_firmware_version.attr,
        &dev_attr_payload_max.attr,
        &dev_attr_label_storage_size.attr,
+       &dev_attr_numa_node.attr,
        NULL,
 };
 
@@ -106,8 +125,17 @@ static struct attribute *cxl_memdev_ram_attributes[] = {
        NULL,
 };
 
+static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
+                                 int n)
+{
+       if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
+               return 0;
+       return a->mode;
+}
+
 static struct attribute_group cxl_memdev_attribute_group = {
        .attrs = cxl_memdev_attributes,
+       .is_visible = cxl_memdev_visible,
 };
 
 static struct attribute_group cxl_memdev_ram_attribute_group = {
@@ -134,6 +162,12 @@ static const struct device_type cxl_memdev_type = {
        .groups = cxl_memdev_attribute_groups,
 };
 
+bool is_cxl_memdev(struct device *dev)
+{
+       return dev->type == &cxl_memdev_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
+
 /**
  * set_exclusive_cxl_commands() - atomically disable user cxl commands
  * @cxlds: The device state to operate on
@@ -185,6 +219,15 @@ static void cxl_memdev_unregister(void *_cxlmd)
        put_device(dev);
 }
 
+static void detach_memdev(struct work_struct *work)
+{
+       struct cxl_memdev *cxlmd;
+
+       cxlmd = container_of(work, typeof(*cxlmd), detach_work);
+       device_release_driver(&cxlmd->dev);
+       put_device(&cxlmd->dev);
+}
+
 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
                                           const struct file_operations *fops)
 {
@@ -209,6 +252,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
        dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
        dev->type = &cxl_memdev_type;
        device_set_pm_not_required(dev);
+       INIT_WORK(&cxlmd->detach_work, detach_memdev);
 
        cdev = &cxlmd->cdev;
        cdev_init(cdev, fops);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
new file mode 100644 (file)
index 0000000..c9a494d
--- /dev/null
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <cxlpci.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core pci
+ *
+ * Compute Express Link protocols are layered on top of PCIe. CXL core provides
+ * a set of helpers for CXL interactions which occur via PCIe.
+ */
+
+struct cxl_walk_context {
+       struct pci_bus *bus;
+       struct cxl_port *port;
+       int type;
+       int error;
+       int count;
+};
+
+static int match_add_dports(struct pci_dev *pdev, void *data)
+{
+       struct cxl_walk_context *ctx = data;
+       struct cxl_port *port = ctx->port;
+       int type = pci_pcie_type(pdev);
+       struct cxl_register_map map;
+       struct cxl_dport *dport;
+       u32 lnkcap, port_num;
+       int rc;
+
+       if (pdev->bus != ctx->bus)
+               return 0;
+       if (!pci_is_pcie(pdev))
+               return 0;
+       if (type != ctx->type)
+               return 0;
+       if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
+                                 &lnkcap))
+               return 0;
+
+       rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+       if (rc)
+               dev_dbg(&port->dev, "failed to find component registers\n");
+
+       port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
+       dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
+                                  cxl_regmap_to_base(pdev, &map));
+       if (IS_ERR(dport)) {
+               ctx->error = PTR_ERR(dport);
+               return PTR_ERR(dport);
+       }
+       ctx->count++;
+
+       dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
+
+       return 0;
+}
+
+/**
+ * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
+ * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
+ *
+ * Returns a positive number of dports enumerated or a negative error
+ * code.
+ */
+int devm_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+       struct pci_bus *bus = cxl_port_to_pci_bus(port);
+       struct cxl_walk_context ctx;
+       int type;
+
+       if (!bus)
+               return -ENXIO;
+
+       if (pci_is_root_bus(bus))
+               type = PCI_EXP_TYPE_ROOT_PORT;
+       else
+               type = PCI_EXP_TYPE_DOWNSTREAM;
+
+       ctx = (struct cxl_walk_context) {
+               .port = port,
+               .bus = bus,
+               .type = type,
+       };
+       pci_walk_bus(bus, match_add_dports, &ctx);
+
+       if (ctx.count == 0)
+               return -ENODEV;
+       if (ctx.error)
+               return ctx.error;
+       return ctx.count;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
index b5fca97..8de240c 100644 (file)
@@ -57,24 +57,30 @@ bool is_cxl_nvdimm_bridge(struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL);
 
-__mock int match_nvdimm_bridge(struct device *dev, const void *data)
+static int match_nvdimm_bridge(struct device *dev, void *data)
 {
        return is_cxl_nvdimm_bridge(dev);
 }
 
 struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
 {
+       struct cxl_port *port = find_cxl_root(&cxl_nvd->dev);
        struct device *dev;
 
-       dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge);
+       if (!port)
+               return NULL;
+
+       dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge);
+       put_device(&port->dev);
+
        if (!dev)
                return NULL;
+
        return to_cxl_nvdimm_bridge(dev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL);
 
-static struct cxl_nvdimm_bridge *
-cxl_nvdimm_bridge_alloc(struct cxl_port *port)
+static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
 {
        struct cxl_nvdimm_bridge *cxl_nvb;
        struct device *dev;
@@ -115,10 +121,10 @@ static void unregister_nvb(void *_cxl_nvb)
         * work to flush. Once the state has been changed to 'dead' then no new
         * work can be queued by user-triggered bind.
         */
-       device_lock(&cxl_nvb->dev);
+       cxl_device_lock(&cxl_nvb->dev);
        flush = cxl_nvb->state != CXL_NVB_NEW;
        cxl_nvb->state = CXL_NVB_DEAD;
-       device_unlock(&cxl_nvb->dev);
+       cxl_device_unlock(&cxl_nvb->dev);
 
        /*
         * Even though the device core will trigger device_release_driver()
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
new file mode 100644 (file)
index 0000000..2ab1ba4
--- /dev/null
@@ -0,0 +1,1568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <cxlpci.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core
+ *
+ * The CXL core provides a set of interfaces that can be consumed by CXL aware
+ * drivers. The interfaces allow for creation, modification, and destruction of
+ * regions, memory devices, ports, and decoders. CXL aware drivers must register
+ * with the CXL core via these interfaces in order to be able to participate in
+ * cross-device interleave coordination. The CXL core also establishes and
+ * maintains the bridge to the nvdimm subsystem.
+ *
+ * CXL core introduces sysfs hierarchy to control the devices that are
+ * instantiated by the core.
+ */
+
+static DEFINE_IDA(cxl_port_ida);
+static DEFINE_XARRAY(cxl_root_buses);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       return sysfs_emit(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static int cxl_device_id(struct device *dev)
+{
+       if (dev->type == &cxl_nvdimm_bridge_type)
+               return CXL_DEVICE_NVDIMM_BRIDGE;
+       if (dev->type == &cxl_nvdimm_type)
+               return CXL_DEVICE_NVDIMM;
+       if (is_cxl_port(dev)) {
+               if (is_cxl_root(to_cxl_port(dev)))
+                       return CXL_DEVICE_ROOT;
+               return CXL_DEVICE_PORT;
+       }
+       if (is_cxl_memdev(dev))
+               return CXL_DEVICE_MEMORY_EXPANDER;
+       return 0;
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+       return sysfs_emit(buf, CXL_MODALIAS_FMT "\n", cxl_device_id(dev));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *cxl_base_attributes[] = {
+       &dev_attr_devtype.attr,
+       &dev_attr_modalias.attr,
+       NULL,
+};
+
+struct attribute_group cxl_base_attribute_group = {
+       .attrs = cxl_base_attributes,
+};
+
+static ssize_t start_show(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+       u64 start;
+
+       if (is_root_decoder(dev))
+               start = cxld->platform_res.start;
+       else
+               start = cxld->decoder_range.start;
+
+       return sysfs_emit(buf, "%#llx\n", start);
+}
+static DEVICE_ATTR_ADMIN_RO(start);
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+       u64 size;
+
+       if (is_root_decoder(dev))
+               size = resource_size(&cxld->platform_res);
+       else
+               size = range_len(&cxld->decoder_range);
+
+       return sysfs_emit(buf, "%#llx\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+#define CXL_DECODER_FLAG_ATTR(name, flag)                            \
+static ssize_t name##_show(struct device *dev,                       \
+                          struct device_attribute *attr, char *buf) \
+{                                                                    \
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
+                                                                     \
+       return sysfs_emit(buf, "%s\n",                               \
+                         (cxld->flags & (flag)) ? "1" : "0");       \
+}                                                                    \
+static DEVICE_ATTR_RO(name)
+
+CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
+CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
+CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
+CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
+CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
+
+static ssize_t target_type_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+       switch (cxld->target_type) {
+       case CXL_DECODER_ACCELERATOR:
+               return sysfs_emit(buf, "accelerator\n");
+       case CXL_DECODER_EXPANDER:
+               return sysfs_emit(buf, "expander\n");
+       }
+       return -ENXIO;
+}
+static DEVICE_ATTR_RO(target_type);
+
+static ssize_t emit_target_list(struct cxl_decoder *cxld, char *buf)
+{
+       ssize_t offset = 0;
+       int i, rc = 0;
+
+       for (i = 0; i < cxld->interleave_ways; i++) {
+               struct cxl_dport *dport = cxld->target[i];
+               struct cxl_dport *next = NULL;
+
+               if (!dport)
+                       break;
+
+               if (i + 1 < cxld->interleave_ways)
+                       next = cxld->target[i + 1];
+               rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
+                                  next ? "," : "");
+               if (rc < 0)
+                       return rc;
+               offset += rc;
+       }
+
+       return offset;
+}
+
+static ssize_t target_list_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+       ssize_t offset;
+       unsigned int seq;
+       int rc;
+
+       do {
+               seq = read_seqbegin(&cxld->target_lock);
+               rc = emit_target_list(cxld, buf);
+       } while (read_seqretry(&cxld->target_lock, seq));
+
+       if (rc < 0)
+               return rc;
+       offset = rc;
+
+       rc = sysfs_emit_at(buf, offset, "\n");
+       if (rc < 0)
+               return rc;
+
+       return offset + rc;
+}
+static DEVICE_ATTR_RO(target_list);
+
+static struct attribute *cxl_decoder_base_attrs[] = {
+       &dev_attr_start.attr,
+       &dev_attr_size.attr,
+       &dev_attr_locked.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_base_attribute_group = {
+       .attrs = cxl_decoder_base_attrs,
+};
+
+static struct attribute *cxl_decoder_root_attrs[] = {
+       &dev_attr_cap_pmem.attr,
+       &dev_attr_cap_ram.attr,
+       &dev_attr_cap_type2.attr,
+       &dev_attr_cap_type3.attr,
+       &dev_attr_target_list.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_root_attribute_group = {
+       .attrs = cxl_decoder_root_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
+       &cxl_decoder_root_attribute_group,
+       &cxl_decoder_base_attribute_group,
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static struct attribute *cxl_decoder_switch_attrs[] = {
+       &dev_attr_target_type.attr,
+       &dev_attr_target_list.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_switch_attribute_group = {
+       .attrs = cxl_decoder_switch_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
+       &cxl_decoder_switch_attribute_group,
+       &cxl_decoder_base_attribute_group,
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static struct attribute *cxl_decoder_endpoint_attrs[] = {
+       &dev_attr_target_type.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_endpoint_attribute_group = {
+       .attrs = cxl_decoder_endpoint_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_endpoint_attribute_groups[] = {
+       &cxl_decoder_base_attribute_group,
+       &cxl_decoder_endpoint_attribute_group,
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static void cxl_decoder_release(struct device *dev)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+       struct cxl_port *port = to_cxl_port(dev->parent);
+
+       ida_free(&port->decoder_ida, cxld->id);
+       kfree(cxld);
+       put_device(&port->dev);
+}
+
+static const struct device_type cxl_decoder_endpoint_type = {
+       .name = "cxl_decoder_endpoint",
+       .release = cxl_decoder_release,
+       .groups = cxl_decoder_endpoint_attribute_groups,
+};
+
+static const struct device_type cxl_decoder_switch_type = {
+       .name = "cxl_decoder_switch",
+       .release = cxl_decoder_release,
+       .groups = cxl_decoder_switch_attribute_groups,
+};
+
+static const struct device_type cxl_decoder_root_type = {
+       .name = "cxl_decoder_root",
+       .release = cxl_decoder_release,
+       .groups = cxl_decoder_root_attribute_groups,
+};
+
+static bool is_endpoint_decoder(struct device *dev)
+{
+       return dev->type == &cxl_decoder_endpoint_type;
+}
+
+bool is_root_decoder(struct device *dev)
+{
+       return dev->type == &cxl_decoder_root_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL);
+
+bool is_cxl_decoder(struct device *dev)
+{
+       return dev->type && dev->type->release == cxl_decoder_release;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_decoder, CXL);
+
+struct cxl_decoder *to_cxl_decoder(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
+                         "not a cxl_decoder device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_decoder, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
+
+static void cxl_ep_release(struct cxl_ep *ep)
+{
+       if (!ep)
+               return;
+       list_del(&ep->list);
+       put_device(ep->ep);
+       kfree(ep);
+}
+
+static void cxl_port_release(struct device *dev)
+{
+       struct cxl_port *port = to_cxl_port(dev);
+       struct cxl_ep *ep, *_e;
+
+       cxl_device_lock(dev);
+       list_for_each_entry_safe(ep, _e, &port->endpoints, list)
+               cxl_ep_release(ep);
+       cxl_device_unlock(dev);
+       ida_free(&cxl_port_ida, port->id);
+       kfree(port);
+}
+
+static const struct attribute_group *cxl_port_attribute_groups[] = {
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static const struct device_type cxl_port_type = {
+       .name = "cxl_port",
+       .release = cxl_port_release,
+       .groups = cxl_port_attribute_groups,
+};
+
+bool is_cxl_port(struct device *dev)
+{
+       return dev->type == &cxl_port_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_port, CXL);
+
+struct cxl_port *to_cxl_port(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
+                         "not a cxl_port device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_port, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
+
+static void unregister_port(void *_port)
+{
+       struct cxl_port *port = _port;
+       struct cxl_port *parent;
+       struct device *lock_dev;
+
+       if (is_cxl_root(port))
+               parent = NULL;
+       else
+               parent = to_cxl_port(port->dev.parent);
+
+       /*
+        * CXL root port's and the first level of ports are unregistered
+        * under the platform firmware device lock, all other ports are
+        * unregistered while holding their parent port lock.
+        */
+       if (!parent)
+               lock_dev = port->uport;
+       else if (is_cxl_root(parent))
+               lock_dev = parent->uport;
+       else
+               lock_dev = &parent->dev;
+
+       device_lock_assert(lock_dev);
+       port->uport = NULL;
+       device_unregister(&port->dev);
+}
+
+static void cxl_unlink_uport(void *_port)
+{
+       struct cxl_port *port = _port;
+
+       sysfs_remove_link(&port->dev.kobj, "uport");
+}
+
+static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
+{
+       int rc;
+
+       rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
+       if (rc)
+               return rc;
+       return devm_add_action_or_reset(host, cxl_unlink_uport, port);
+}
+
+static struct cxl_port *cxl_port_alloc(struct device *uport,
+                                      resource_size_t component_reg_phys,
+                                      struct cxl_port *parent_port)
+{
+       struct cxl_port *port;
+       struct device *dev;
+       int rc;
+
+       port = kzalloc(sizeof(*port), GFP_KERNEL);
+       if (!port)
+               return ERR_PTR(-ENOMEM);
+
+       rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
+       if (rc < 0)
+               goto err;
+       port->id = rc;
+
+       /*
+        * The top-level cxl_port "cxl_root" does not have a cxl_port as
+        * its parent and it does not have any corresponding component
+        * registers as its decode is described by a fixed platform
+        * description.
+        */
+       dev = &port->dev;
+       if (parent_port)
+               dev->parent = &parent_port->dev;
+       else
+               dev->parent = uport;
+
+       port->uport = uport;
+       port->component_reg_phys = component_reg_phys;
+       ida_init(&port->decoder_ida);
+       INIT_LIST_HEAD(&port->dports);
+       INIT_LIST_HEAD(&port->endpoints);
+
+       device_initialize(dev);
+       device_set_pm_not_required(dev);
+       dev->bus = &cxl_bus_type;
+       dev->type = &cxl_port_type;
+
+       return port;
+
+err:
+       kfree(port);
+       return ERR_PTR(rc);
+}
+
+/**
+ * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
+ * @host: host device for devm operations
+ * @uport: "physical" device implementing this upstream port
+ * @component_reg_phys: (optional) for configurable cxl_port instances
+ * @parent_port: next hop up in the CXL memory decode hierarchy
+ */
+struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+                                  resource_size_t component_reg_phys,
+                                  struct cxl_port *parent_port)
+{
+       struct cxl_port *port;
+       struct device *dev;
+       int rc;
+
+       port = cxl_port_alloc(uport, component_reg_phys, parent_port);
+       if (IS_ERR(port))
+               return port;
+
+       if (parent_port)
+               port->depth = parent_port->depth + 1;
+       dev = &port->dev;
+       if (is_cxl_memdev(uport))
+               rc = dev_set_name(dev, "endpoint%d", port->id);
+       else if (parent_port)
+               rc = dev_set_name(dev, "port%d", port->id);
+       else
+               rc = dev_set_name(dev, "root%d", port->id);
+       if (rc)
+               goto err;
+
+       rc = device_add(dev);
+       if (rc)
+               goto err;
+
+       rc = devm_add_action_or_reset(host, unregister_port, port);
+       if (rc)
+               return ERR_PTR(rc);
+
+       rc = devm_cxl_link_uport(host, port);
+       if (rc)
+               return ERR_PTR(rc);
+
+       return port;
+
+err:
+       put_device(dev);
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
+
+struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port)
+{
+       /* There is no pci_bus associated with a CXL platform-root port */
+       if (is_cxl_root(port))
+               return NULL;
+
+       if (dev_is_pci(port->uport)) {
+               struct pci_dev *pdev = to_pci_dev(port->uport);
+
+               return pdev->subordinate;
+       }
+
+       return xa_load(&cxl_root_buses, (unsigned long)port->uport);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_to_pci_bus, CXL);
+
+static void unregister_pci_bus(void *uport)
+{
+       xa_erase(&cxl_root_buses, (unsigned long)uport);
+}
+
+int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
+                             struct pci_bus *bus)
+{
+       int rc;
+
+       if (dev_is_pci(uport))
+               return -EINVAL;
+
+       rc = xa_insert(&cxl_root_buses, (unsigned long)uport, bus, GFP_KERNEL);
+       if (rc)
+               return rc;
+       return devm_add_action_or_reset(host, unregister_pci_bus, uport);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL);
+
+static bool dev_is_cxl_root_child(struct device *dev)
+{
+       struct cxl_port *port, *parent;
+
+       if (!is_cxl_port(dev))
+               return false;
+
+       port = to_cxl_port(dev);
+       if (is_cxl_root(port))
+               return false;
+
+       parent = to_cxl_port(port->dev.parent);
+       if (is_cxl_root(parent))
+               return true;
+
+       return false;
+}
+
+/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
+static int match_root_child(struct device *dev, const void *match)
+{
+       const struct device *iter = NULL;
+       struct cxl_dport *dport;
+       struct cxl_port *port;
+
+       if (!dev_is_cxl_root_child(dev))
+               return 0;
+
+       port = to_cxl_port(dev);
+       cxl_device_lock(dev);
+       list_for_each_entry(dport, &port->dports, list) {
+               iter = match;
+               while (iter) {
+                       if (iter == dport->dport)
+                               goto out;
+                       iter = iter->parent;
+               }
+       }
+out:
+       cxl_device_unlock(dev);
+
+       return !!iter;
+}
+
+struct cxl_port *find_cxl_root(struct device *dev)
+{
+       struct device *port_dev;
+       struct cxl_port *root;
+
+       port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
+       if (!port_dev)
+               return NULL;
+
+       root = to_cxl_port(port_dev->parent);
+       get_device(&root->dev);
+       put_device(port_dev);
+       return root;
+}
+EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
+
+static struct cxl_dport *find_dport(struct cxl_port *port, int id)
+{
+       struct cxl_dport *dport;
+
+       device_lock_assert(&port->dev);
+       list_for_each_entry (dport, &port->dports, list)
+               if (dport->port_id == id)
+                       return dport;
+       return NULL;
+}
+
+static int add_dport(struct cxl_port *port, struct cxl_dport *new)
+{
+       struct cxl_dport *dup;
+
+       device_lock_assert(&port->dev);
+       dup = find_dport(port, new->port_id);
+       if (dup)
+               dev_err(&port->dev,
+                       "unable to add dport%d-%s non-unique port id (%s)\n",
+                       new->port_id, dev_name(new->dport),
+                       dev_name(dup->dport));
+       else
+               list_add_tail(&new->list, &port->dports);
+
+       return dup ? -EEXIST : 0;
+}
+
+/*
+ * Since root-level CXL dports cannot be enumerated by PCI they are not
+ * enumerated by the common port driver that acquires the port lock over
+ * dport add/remove. Instead, root dports are manually added by a
+ * platform driver and cond_cxl_root_lock() is used to take the missing
+ * port lock in that case.
+ */
+static void cond_cxl_root_lock(struct cxl_port *port)
+{
+       if (is_cxl_root(port))
+               cxl_device_lock(&port->dev);
+}
+
+static void cond_cxl_root_unlock(struct cxl_port *port)
+{
+       if (is_cxl_root(port))
+               cxl_device_unlock(&port->dev);
+}
+
+static void cxl_dport_remove(void *data)
+{
+       struct cxl_dport *dport = data;
+       struct cxl_port *port = dport->port;
+
+       put_device(dport->dport);
+       cond_cxl_root_lock(port);
+       list_del(&dport->list);
+       cond_cxl_root_unlock(port);
+}
+
+static void cxl_dport_unlink(void *data)
+{
+       struct cxl_dport *dport = data;
+       struct cxl_port *port = dport->port;
+       char link_name[CXL_TARGET_STRLEN];
+
+       sprintf(link_name, "dport%d", dport->port_id);
+       sysfs_remove_link(&port->dev.kobj, link_name);
+}
+
+/**
+ * devm_cxl_add_dport - append downstream port data to a cxl_port
+ * @port: the cxl_port that references this dport
+ * @dport_dev: firmware or PCI device representing the dport
+ * @port_id: identifier for this dport in a decoder's target list
+ * @component_reg_phys: optional location of CXL component registers
+ *
+ * Note that dports are appended to the devm release action's of the
+ * either the port's host (for root ports), or the port itself (for
+ * switch ports)
+ */
+struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
+                                    struct device *dport_dev, int port_id,
+                                    resource_size_t component_reg_phys)
+{
+       char link_name[CXL_TARGET_STRLEN];
+       struct cxl_dport *dport;
+       struct device *host;
+       int rc;
+
+       if (is_cxl_root(port))
+               host = port->uport;
+       else
+               host = &port->dev;
+
+       if (!host->driver) {
+               dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n",
+                             dev_name(dport_dev));
+               return ERR_PTR(-ENXIO);
+       }
+
+       if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
+           CXL_TARGET_STRLEN)
+               return ERR_PTR(-EINVAL);
+
+       dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL);
+       if (!dport)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&dport->list);
+       dport->dport = dport_dev;
+       dport->port_id = port_id;
+       dport->component_reg_phys = component_reg_phys;
+       dport->port = port;
+
+       cond_cxl_root_lock(port);
+       rc = add_dport(port, dport);
+       cond_cxl_root_unlock(port);
+       if (rc)
+               return ERR_PTR(rc);
+
+       get_device(dport_dev);
+       rc = devm_add_action_or_reset(host, cxl_dport_remove, dport);
+       if (rc)
+               return ERR_PTR(rc);
+
+       rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
+       if (rc)
+               return ERR_PTR(rc);
+
+       rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport);
+       if (rc)
+               return ERR_PTR(rc);
+
+       return dport;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
+
+static struct cxl_ep *find_ep(struct cxl_port *port, struct device *ep_dev)
+{
+       struct cxl_ep *ep;
+
+       device_lock_assert(&port->dev);
+       list_for_each_entry(ep, &port->endpoints, list)
+               if (ep->ep == ep_dev)
+                       return ep;
+       return NULL;
+}
+
+static int add_ep(struct cxl_port *port, struct cxl_ep *new)
+{
+       struct cxl_ep *dup;
+
+       cxl_device_lock(&port->dev);
+       if (port->dead) {
+               cxl_device_unlock(&port->dev);
+               return -ENXIO;
+       }
+       dup = find_ep(port, new->ep);
+       if (!dup)
+               list_add_tail(&new->list, &port->endpoints);
+       cxl_device_unlock(&port->dev);
+
+       return dup ? -EEXIST : 0;
+}
+
+/**
+ * cxl_add_ep - register an endpoint's interest in a port
+ * @port: a port in the endpoint's topology ancestry
+ * @ep_dev: device representing the endpoint
+ *
+ * Intermediate CXL ports are scanned based on the arrival of endpoints.
+ * When those endpoints depart the port can be destroyed once all
+ * endpoints that care about that port have been removed.
+ */
+static int cxl_add_ep(struct cxl_port *port, struct device *ep_dev)
+{
+       struct cxl_ep *ep;
+       int rc;
+
+       ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+       if (!ep)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&ep->list);
+       ep->ep = get_device(ep_dev);
+
+       rc = add_ep(port, ep);
+       if (rc)
+               cxl_ep_release(ep);
+       return rc;
+}
+
+struct cxl_find_port_ctx {
+       const struct device *dport_dev;
+       const struct cxl_port *parent_port;
+};
+
+static int match_port_by_dport(struct device *dev, const void *data)
+{
+       const struct cxl_find_port_ctx *ctx = data;
+       struct cxl_port *port;
+
+       if (!is_cxl_port(dev))
+               return 0;
+       if (ctx->parent_port && dev->parent != &ctx->parent_port->dev)
+               return 0;
+
+       port = to_cxl_port(dev);
+       return cxl_find_dport_by_dev(port, ctx->dport_dev) != NULL;
+}
+
+static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
+{
+       struct device *dev;
+
+       if (!ctx->dport_dev)
+               return NULL;
+
+       dev = bus_find_device(&cxl_bus_type, NULL, ctx, match_port_by_dport);
+       if (dev)
+               return to_cxl_port(dev);
+       return NULL;
+}
+
+static struct cxl_port *find_cxl_port(struct device *dport_dev)
+{
+       struct cxl_find_port_ctx ctx = {
+               .dport_dev = dport_dev,
+       };
+
+       return __find_cxl_port(&ctx);
+}
+
+static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port,
+                                        struct device *dport_dev)
+{
+       struct cxl_find_port_ctx ctx = {
+               .dport_dev = dport_dev,
+               .parent_port = parent_port,
+       };
+
+       return __find_cxl_port(&ctx);
+}
+
+/*
+ * All users of grandparent() are using it to walk PCIe-like swich port
+ * hierarchy. A PCIe switch is comprised of a bridge device representing the
+ * upstream switch port and N bridges representing downstream switch ports. When
+ * bridges stack the grand-parent of a downstream switch port is another
+ * downstream switch port in the immediate ancestor switch.
+ */
+static struct device *grandparent(struct device *dev)
+{
+       if (dev && dev->parent)
+               return dev->parent->parent;
+       return NULL;
+}
+
+static void delete_endpoint(void *data)
+{
+       struct cxl_memdev *cxlmd = data;
+       struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev);
+       struct cxl_port *parent_port;
+       struct device *parent;
+
+       parent_port = cxl_mem_find_port(cxlmd);
+       if (!parent_port)
+               goto out;
+       parent = &parent_port->dev;
+
+       cxl_device_lock(parent);
+       if (parent->driver && endpoint->uport) {
+               devm_release_action(parent, cxl_unlink_uport, endpoint);
+               devm_release_action(parent, unregister_port, endpoint);
+       }
+       cxl_device_unlock(parent);
+       put_device(parent);
+out:
+       put_device(&endpoint->dev);
+}
+
+int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
+{
+       struct device *dev = &cxlmd->dev;
+
+       get_device(&endpoint->dev);
+       dev_set_drvdata(dev, endpoint);
+       return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
+
+/*
+ * The natural end of life of a non-root 'cxl_port' is when its parent port goes
+ * through a ->remove() event ("top-down" unregistration). The unnatural trigger
+ * for a port to be unregistered is when all memdevs beneath that port have gone
+ * through ->remove(). This "bottom-up" removal selectively removes individual
+ * child ports manually. This depends on devm_cxl_add_port() to not change is
+ * devm action registration order.
+ */
+static void delete_switch_port(struct cxl_port *port, struct list_head *dports)
+{
+       struct cxl_dport *dport, *_d;
+
+       list_for_each_entry_safe(dport, _d, dports, list) {
+               devm_release_action(&port->dev, cxl_dport_unlink, dport);
+               devm_release_action(&port->dev, cxl_dport_remove, dport);
+               devm_kfree(&port->dev, dport);
+       }
+       devm_release_action(port->dev.parent, cxl_unlink_uport, port);
+       devm_release_action(port->dev.parent, unregister_port, port);
+}
+
+static void cxl_detach_ep(void *data)
+{
+       struct cxl_memdev *cxlmd = data;
+       struct device *iter;
+
+       for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) {
+               struct device *dport_dev = grandparent(iter);
+               struct cxl_port *port, *parent_port;
+               LIST_HEAD(reap_dports);
+               struct cxl_ep *ep;
+
+               if (!dport_dev)
+                       break;
+
+               port = find_cxl_port(dport_dev);
+               if (!port)
+                       continue;
+
+               if (is_cxl_root(port)) {
+                       put_device(&port->dev);
+                       continue;
+               }
+
+               parent_port = to_cxl_port(port->dev.parent);
+               cxl_device_lock(&parent_port->dev);
+               if (!parent_port->dev.driver) {
+                       /*
+                        * The bottom-up race to delete the port lost to a
+                        * top-down port disable, give up here, because the
+                        * parent_port ->remove() will have cleaned up all
+                        * descendants.
+                        */
+                       cxl_device_unlock(&parent_port->dev);
+                       put_device(&port->dev);
+                       continue;
+               }
+
+               cxl_device_lock(&port->dev);
+               ep = find_ep(port, &cxlmd->dev);
+               dev_dbg(&cxlmd->dev, "disconnect %s from %s\n",
+                       ep ? dev_name(ep->ep) : "", dev_name(&port->dev));
+               cxl_ep_release(ep);
+               if (ep && !port->dead && list_empty(&port->endpoints) &&
+                   !is_cxl_root(parent_port)) {
+                       /*
+                        * This was the last ep attached to a dynamically
+                        * enumerated port. Block new cxl_add_ep() and garbage
+                        * collect the port.
+                        */
+                       port->dead = true;
+                       list_splice_init(&port->dports, &reap_dports);
+               }
+               cxl_device_unlock(&port->dev);
+
+               if (!list_empty(&reap_dports)) {
+                       dev_dbg(&cxlmd->dev, "delete %s\n",
+                               dev_name(&port->dev));
+                       delete_switch_port(port, &reap_dports);
+               }
+               put_device(&port->dev);
+               cxl_device_unlock(&parent_port->dev);
+       }
+}
+
+static resource_size_t find_component_registers(struct device *dev)
+{
+       struct cxl_register_map map;
+       struct pci_dev *pdev;
+
+       /*
+        * Theoretically, CXL component registers can be hosted on a
+        * non-PCI device, in practice, only cxl_test hits this case.
+        */
+       if (!dev_is_pci(dev))
+               return CXL_RESOURCE_NONE;
+
+       pdev = to_pci_dev(dev);
+
+       cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+       return cxl_regmap_to_base(pdev, &map);
+}
+
+static int add_port_attach_ep(struct cxl_memdev *cxlmd,
+                             struct device *uport_dev,
+                             struct device *dport_dev)
+{
+       struct device *dparent = grandparent(dport_dev);
+       struct cxl_port *port, *parent_port = NULL;
+       resource_size_t component_reg_phys;
+       int rc;
+
+       if (!dparent) {
+               /*
+                * The iteration reached the topology root without finding the
+                * CXL-root 'cxl_port' on a previous iteration, fail for now to
+                * be re-probed after platform driver attaches.
+                */
+               dev_dbg(&cxlmd->dev, "%s is a root dport\n",
+                       dev_name(dport_dev));
+               return -ENXIO;
+       }
+
+       parent_port = find_cxl_port(dparent);
+       if (!parent_port) {
+               /* iterate to create this parent_port */
+               return -EAGAIN;
+       }
+
+       cxl_device_lock(&parent_port->dev);
+       if (!parent_port->dev.driver) {
+               dev_warn(&cxlmd->dev,
+                        "port %s:%s disabled, failed to enumerate CXL.mem\n",
+                        dev_name(&parent_port->dev), dev_name(uport_dev));
+               port = ERR_PTR(-ENXIO);
+               goto out;
+       }
+
+       port = find_cxl_port_at(parent_port, dport_dev);
+       if (!port) {
+               component_reg_phys = find_component_registers(uport_dev);
+               port = devm_cxl_add_port(&parent_port->dev, uport_dev,
+                                        component_reg_phys, parent_port);
+               if (!IS_ERR(port))
+                       get_device(&port->dev);
+       }
+out:
+       cxl_device_unlock(&parent_port->dev);
+
+       if (IS_ERR(port))
+               rc = PTR_ERR(port);
+       else {
+               dev_dbg(&cxlmd->dev, "add to new port %s:%s\n",
+                       dev_name(&port->dev), dev_name(port->uport));
+               rc = cxl_add_ep(port, &cxlmd->dev);
+               if (rc == -EEXIST) {
+                       /*
+                        * "can't" happen, but this error code means
+                        * something to the caller, so translate it.
+                        */
+                       rc = -ENXIO;
+               }
+               put_device(&port->dev);
+       }
+
+       put_device(&parent_port->dev);
+       return rc;
+}
+
+int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
+{
+       struct device *dev = &cxlmd->dev;
+       struct device *iter;
+       int rc;
+
+       rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd);
+       if (rc)
+               return rc;
+
+       /*
+        * Scan for and add all cxl_ports in this device's ancestry.
+        * Repeat until no more ports are added. Abort if a port add
+        * attempt fails.
+        */
+retry:
+       for (iter = dev; iter; iter = grandparent(iter)) {
+               struct device *dport_dev = grandparent(iter);
+               struct device *uport_dev;
+               struct cxl_port *port;
+
+               if (!dport_dev)
+                       return 0;
+
+               uport_dev = dport_dev->parent;
+               if (!uport_dev) {
+                       dev_warn(dev, "at %s no parent for dport: %s\n",
+                                dev_name(iter), dev_name(dport_dev));
+                       return -ENXIO;
+               }
+
+               dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n",
+                       dev_name(iter), dev_name(dport_dev),
+                       dev_name(uport_dev));
+               port = find_cxl_port(dport_dev);
+               if (port) {
+                       dev_dbg(&cxlmd->dev,
+                               "found already registered port %s:%s\n",
+                               dev_name(&port->dev), dev_name(port->uport));
+                       rc = cxl_add_ep(port, &cxlmd->dev);
+
+                       /*
+                        * If the endpoint already exists in the port's list,
+                        * that's ok, it was added on a previous pass.
+                        * Otherwise, retry in add_port_attach_ep() after taking
+                        * the parent_port lock as the current port may be being
+                        * reaped.
+                        */
+                       if (rc && rc != -EEXIST) {
+                               put_device(&port->dev);
+                               return rc;
+                       }
+
+                       /* Any more ports to add between this one and the root? */
+                       if (!dev_is_cxl_root_child(&port->dev)) {
+                               put_device(&port->dev);
+                               continue;
+                       }
+
+                       put_device(&port->dev);
+                       return 0;
+               }
+
+               rc = add_port_attach_ep(cxlmd, uport_dev, dport_dev);
+               /* port missing, try to add parent */
+               if (rc == -EAGAIN)
+                       continue;
+               /* failed to add ep or port */
+               if (rc)
+                       return rc;
+               /* port added, new descendants possible, start over */
+               goto retry;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL);
+
+struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd)
+{
+       return find_cxl_port(grandparent(&cxlmd->dev));
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL);
+
+struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port,
+                                       const struct device *dev)
+{
+       struct cxl_dport *dport;
+
+       cxl_device_lock(&port->dev);
+       list_for_each_entry(dport, &port->dports, list)
+               if (dport->dport == dev) {
+                       cxl_device_unlock(&port->dev);
+                       return dport;
+               }
+
+       cxl_device_unlock(&port->dev);
+       return NULL;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL);
+
+static int decoder_populate_targets(struct cxl_decoder *cxld,
+                                   struct cxl_port *port, int *target_map)
+{
+       int i, rc = 0;
+
+       if (!target_map)
+               return 0;
+
+       device_lock_assert(&port->dev);
+
+       if (list_empty(&port->dports))
+               return -EINVAL;
+
+       write_seqlock(&cxld->target_lock);
+       for (i = 0; i < cxld->nr_targets; i++) {
+               struct cxl_dport *dport = find_dport(port, target_map[i]);
+
+               if (!dport) {
+                       rc = -ENXIO;
+                       break;
+               }
+               cxld->target[i] = dport;
+       }
+       write_sequnlock(&cxld->target_lock);
+
+       return rc;
+}
+
+/**
+ * cxl_decoder_alloc - Allocate a new CXL decoder
+ * @port: owning port of this decoder
+ * @nr_targets: downstream targets accessible by this decoder. All upstream
+ *             ports and root ports must have at least 1 target. Endpoint
+ *             devices will have 0 targets. Callers wishing to register an
+ *             endpoint device should specify 0.
+ *
+ * A port should contain one or more decoders. Each of those decoders enable
+ * some address space for CXL.mem utilization. A decoder is expected to be
+ * configured by the caller before registering.
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add(). The decoder
+ *        is initialized to be a "passthrough" decoder.
+ */
+static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port,
+                                            unsigned int nr_targets)
+{
+       struct cxl_decoder *cxld;
+       struct device *dev;
+       int rc = 0;
+
+       if (nr_targets > CXL_DECODER_MAX_INTERLEAVE)
+               return ERR_PTR(-EINVAL);
+
+       cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
+       if (!cxld)
+               return ERR_PTR(-ENOMEM);
+
+       rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
+       if (rc < 0)
+               goto err;
+
+       /* need parent to stick around to release the id */
+       get_device(&port->dev);
+       cxld->id = rc;
+
+       cxld->nr_targets = nr_targets;
+       seqlock_init(&cxld->target_lock);
+       dev = &cxld->dev;
+       device_initialize(dev);
+       device_set_pm_not_required(dev);
+       dev->parent = &port->dev;
+       dev->bus = &cxl_bus_type;
+       if (is_cxl_root(port))
+               cxld->dev.type = &cxl_decoder_root_type;
+       else if (is_cxl_endpoint(port))
+               cxld->dev.type = &cxl_decoder_endpoint_type;
+       else
+               cxld->dev.type = &cxl_decoder_switch_type;
+
+       /* Pre initialize an "empty" decoder */
+       cxld->interleave_ways = 1;
+       cxld->interleave_granularity = PAGE_SIZE;
+       cxld->target_type = CXL_DECODER_EXPANDER;
+       cxld->platform_res = (struct resource)DEFINE_RES_MEM(0, 0);
+
+       return cxld;
+err:
+       kfree(cxld);
+       return ERR_PTR(rc);
+}
+
+/**
+ * cxl_root_decoder_alloc - Allocate a root level decoder
+ * @port: owning CXL root of this decoder
+ * @nr_targets: static number of downstream targets
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
+ * 'CXL root' decoder is one that decodes from a top-level / static platform
+ * firmware description of CXL resources into a CXL standard decode
+ * topology.
+ */
+struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
+                                          unsigned int nr_targets)
+{
+       if (!is_cxl_root(port))
+               return ERR_PTR(-EINVAL);
+
+       return cxl_decoder_alloc(port, nr_targets);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL);
+
+/**
+ * cxl_switch_decoder_alloc - Allocate a switch level decoder
+ * @port: owning CXL switch port of this decoder
+ * @nr_targets: max number of dynamically addressable downstream targets
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
+ * 'switch' decoder is any decoder that can be enumerated by PCIe
+ * topology and the HDM Decoder Capability. This includes the decoders
+ * that sit between Switch Upstream Ports / Switch Downstream Ports and
+ * Host Bridges / Root Ports.
+ */
+struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
+                                            unsigned int nr_targets)
+{
+       if (is_cxl_root(port) || is_cxl_endpoint(port))
+               return ERR_PTR(-EINVAL);
+
+       return cxl_decoder_alloc(port, nr_targets);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL);
+
+/**
+ * cxl_endpoint_decoder_alloc - Allocate an endpoint decoder
+ * @port: owning port of this decoder
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add()
+ */
+struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port)
+{
+       if (!is_cxl_endpoint(port))
+               return ERR_PTR(-EINVAL);
+
+       return cxl_decoder_alloc(port, 0);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, CXL);
+
+/**
+ * cxl_decoder_add_locked - Add a decoder with targets
+ * @cxld: The cxl decoder allocated by cxl_decoder_alloc()
+ * @target_map: A list of downstream ports that this decoder can direct memory
+ *              traffic to. These numbers should correspond with the port number
+ *              in the PCIe Link Capabilities structure.
+ *
+ * Certain types of decoders may not have any targets. The main example of this
+ * is an endpoint device. A more awkward example is a hostbridge whose root
+ * ports get hot added (technically possible, though unlikely).
+ *
+ * This is the locked variant of cxl_decoder_add().
+ *
+ * Context: Process context. Expects the device lock of the port that owns the
+ *         @cxld to be held.
+ *
+ * Return: Negative error code if the decoder wasn't properly configured; else
+ *        returns 0.
+ */
+int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map)
+{
+       struct cxl_port *port;
+       struct device *dev;
+       int rc;
+
+       if (WARN_ON_ONCE(!cxld))
+               return -EINVAL;
+
+       if (WARN_ON_ONCE(IS_ERR(cxld)))
+               return PTR_ERR(cxld);
+
+       if (cxld->interleave_ways < 1)
+               return -EINVAL;
+
+       dev = &cxld->dev;
+
+       port = to_cxl_port(cxld->dev.parent);
+       if (!is_endpoint_decoder(dev)) {
+               rc = decoder_populate_targets(cxld, port, target_map);
+               if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) {
+                       dev_err(&port->dev,
+                               "Failed to populate active decoder targets\n");
+                       return rc;
+               }
+       }
+
+       rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
+       if (rc)
+               return rc;
+
+       /*
+        * Platform decoder resources should show up with a reasonable name. All
+        * other resources are just sub ranges within the main decoder resource.
+        */
+       if (is_root_decoder(dev))
+               cxld->platform_res.name = dev_name(dev);
+
+       return device_add(dev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL);
+
+/**
+ * cxl_decoder_add - Add a decoder with targets
+ * @cxld: The cxl decoder allocated by cxl_decoder_alloc()
+ * @target_map: A list of downstream ports that this decoder can direct memory
+ *              traffic to. These numbers should correspond with the port number
+ *              in the PCIe Link Capabilities structure.
+ *
+ * This is the unlocked variant of cxl_decoder_add_locked().
+ * See cxl_decoder_add_locked().
+ *
+ * Context: Process context. Takes and releases the device lock of the port that
+ *         owns the @cxld.
+ */
+int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
+{
+       struct cxl_port *port;
+       int rc;
+
+       if (WARN_ON_ONCE(!cxld))
+               return -EINVAL;
+
+       if (WARN_ON_ONCE(IS_ERR(cxld)))
+               return PTR_ERR(cxld);
+
+       port = to_cxl_port(cxld->dev.parent);
+
+       cxl_device_lock(&port->dev);
+       rc = cxl_decoder_add_locked(cxld, target_map);
+       cxl_device_unlock(&port->dev);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
+
+static void cxld_unregister(void *dev)
+{
+       device_unregister(dev);
+}
+
+int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
+{
+       return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL);
+
+/**
+ * __cxl_driver_register - register a driver for the cxl bus
+ * @cxl_drv: cxl driver structure to attach
+ * @owner: owning module/driver
+ * @modname: KBUILD_MODNAME for parent driver
+ */
+int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
+                         const char *modname)
+{
+       if (!cxl_drv->probe) {
+               pr_debug("%s ->probe() must be specified\n", modname);
+               return -EINVAL;
+       }
+
+       if (!cxl_drv->name) {
+               pr_debug("%s ->name must be specified\n", modname);
+               return -EINVAL;
+       }
+
+       if (!cxl_drv->id) {
+               pr_debug("%s ->id must be specified\n", modname);
+               return -EINVAL;
+       }
+
+       cxl_drv->drv.bus = &cxl_bus_type;
+       cxl_drv->drv.owner = owner;
+       cxl_drv->drv.mod_name = modname;
+       cxl_drv->drv.name = cxl_drv->name;
+
+       return driver_register(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL);
+
+void cxl_driver_unregister(struct cxl_driver *cxl_drv)
+{
+       driver_unregister(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL);
+
+static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+       return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
+                             cxl_device_id(dev));
+}
+
+static int cxl_bus_match(struct device *dev, struct device_driver *drv)
+{
+       return cxl_device_id(dev) == to_cxl_drv(drv)->id;
+}
+
+static int cxl_bus_probe(struct device *dev)
+{
+       int rc;
+
+       /*
+        * Take the CXL nested lock since the driver core only holds
+        * @dev->mutex and not @dev->lockdep_mutex.
+        */
+       cxl_nested_lock(dev);
+       rc = to_cxl_drv(dev->driver)->probe(dev);
+       cxl_nested_unlock(dev);
+
+       dev_dbg(dev, "probe: %d\n", rc);
+       return rc;
+}
+
+static void cxl_bus_remove(struct device *dev)
+{
+       struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
+
+       cxl_nested_lock(dev);
+       if (cxl_drv->remove)
+               cxl_drv->remove(dev);
+       cxl_nested_unlock(dev);
+}
+
+static struct workqueue_struct *cxl_bus_wq;
+
+int cxl_bus_rescan(void)
+{
+       return bus_rescan_devices(&cxl_bus_type);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
+
+bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
+{
+       return queue_work(cxl_bus_wq, &cxlmd->detach_work);
+}
+EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
+
+/* for user tooling to ensure port disable work has completed */
+static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count)
+{
+       if (sysfs_streq(buf, "1")) {
+               flush_workqueue(cxl_bus_wq);
+               return count;
+       }
+
+       return -EINVAL;
+}
+
+static BUS_ATTR_WO(flush);
+
+static struct attribute *cxl_bus_attributes[] = {
+       &bus_attr_flush.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_bus_attribute_group = {
+       .attrs = cxl_bus_attributes,
+};
+
+static const struct attribute_group *cxl_bus_attribute_groups[] = {
+       &cxl_bus_attribute_group,
+       NULL,
+};
+
+struct bus_type cxl_bus_type = {
+       .name = "cxl",
+       .uevent = cxl_bus_uevent,
+       .match = cxl_bus_match,
+       .probe = cxl_bus_probe,
+       .remove = cxl_bus_remove,
+       .bus_groups = cxl_bus_attribute_groups,
+};
+EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
+
+static __init int cxl_core_init(void)
+{
+       int rc;
+
+       cxl_mbox_init();
+
+       rc = cxl_memdev_init();
+       if (rc)
+               return rc;
+
+       cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0);
+       if (!cxl_bus_wq) {
+               rc = -ENOMEM;
+               goto err_wq;
+       }
+
+       rc = bus_register(&cxl_bus_type);
+       if (rc)
+               goto err_bus;
+
+       return 0;
+
+err_bus:
+       destroy_workqueue(cxl_bus_wq);
+err_wq:
+       cxl_memdev_exit();
+       cxl_mbox_exit();
+       return rc;
+}
+
+static void cxl_core_exit(void)
+{
+       bus_unregister(&cxl_bus_type);
+       destroy_workqueue(cxl_bus_wq);
+       cxl_memdev_exit();
+       cxl_mbox_exit();
+}
+
+module_init(cxl_core_init);
+module_exit(cxl_core_exit);
+MODULE_LICENSE("GPL v2");
index e37e23b..39a129c 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <cxlmem.h>
+#include <cxlpci.h>
 
 /**
  * DOC: cxl registers
@@ -35,7 +36,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
                              struct cxl_component_reg_map *map)
 {
        int cap, cap_count;
-       u64 cap_array;
+       u32 cap_array;
 
        *map = (struct cxl_component_reg_map) { 0 };
 
@@ -45,11 +46,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
         */
        base += CXL_CM_OFFSET;
 
-       cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
+       cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET);
 
        if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
                dev_err(dev,
-                       "Couldn't locate the CXL.cache and CXL.mem capability array header./n");
+                       "Couldn't locate the CXL.cache and CXL.mem capability array header.\n");
                return;
        }
 
@@ -158,9 +159,8 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
 }
 EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL);
 
-static void __iomem *devm_cxl_iomap_block(struct device *dev,
-                                         resource_size_t addr,
-                                         resource_size_t length)
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+                                  resource_size_t length)
 {
        void __iomem *ret_val;
        struct resource *res;
@@ -247,3 +247,58 @@ int cxl_map_device_regs(struct pci_dev *pdev,
        return 0;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
+
+static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
+                               struct cxl_register_map *map)
+{
+       map->block_offset = ((u64)reg_hi << 32) |
+                           (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
+       map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
+       map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+}
+
+/**
+ * cxl_find_regblock() - Locate register blocks by type
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ * @map: Enumeration output, clobbered on error
+ *
+ * Return: 0 if register block enumerated, negative error code otherwise
+ *
+ * A CXL DVSEC may point to one or more register blocks, search for them
+ * by @type.
+ */
+int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+                     struct cxl_register_map *map)
+{
+       u32 regloc_size, regblocks;
+       int regloc, i;
+
+       map->block_offset = U64_MAX;
+       regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
+                                          CXL_DVSEC_REG_LOCATOR);
+       if (!regloc)
+               return -ENXIO;
+
+       pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
+       regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
+
+       regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET;
+       regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8;
+
+       for (i = 0; i < regblocks; i++, regloc += 8) {
+               u32 reg_lo, reg_hi;
+
+               pci_read_config_dword(pdev, regloc, &reg_lo);
+               pci_read_config_dword(pdev, regloc + 4, &reg_hi);
+
+               cxl_decode_regblock(reg_lo, reg_hi, map);
+
+               if (map->reg_type == type)
+                       return 0;
+       }
+
+       map->block_offset = U64_MAX;
+       return -ENODEV;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
index a5a0be3..990b667 100644 (file)
@@ -17,6 +17,9 @@
  * (port-driver, region-driver, nvdimm object-drivers... etc).
  */
 
+/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */
+#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K
+
 /* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/
 #define CXL_CM_OFFSET 0x1000
 #define CXL_CM_CAP_HDR_OFFSET 0x0
 #define CXL_HDM_DECODER_CAP_OFFSET 0x0
 #define   CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
 #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
-#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10
-#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14
-#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18
-#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c
-#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20
+#define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
+#define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
+#define   CXL_HDM_DECODER_ENABLE BIT(1)
+#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
+#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14)
+#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18)
+#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c)
+#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
+#define   CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0)
+#define   CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4)
+#define   CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
+#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
+#define   CXL_HDM_DECODER0_CTRL_TYPE BIT(12)
+#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
+#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
 
 static inline int cxl_hdm_decoder_count(u32 cap_hdr)
 {
@@ -145,6 +160,12 @@ int cxl_map_device_regs(struct pci_dev *pdev,
                        struct cxl_device_regs *regs,
                        struct cxl_register_map *map);
 
+enum cxl_regloc_type;
+int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+                     struct cxl_register_map *map);
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+                                  resource_size_t length);
+
 #define CXL_RESOURCE_NONE ((resource_size_t) -1)
 #define CXL_TARGET_STRLEN 20
 
@@ -158,7 +179,8 @@ int cxl_map_device_regs(struct pci_dev *pdev,
 #define CXL_DECODER_F_TYPE2 BIT(2)
 #define CXL_DECODER_F_TYPE3 BIT(3)
 #define CXL_DECODER_F_LOCK  BIT(4)
-#define CXL_DECODER_F_MASK  GENMASK(4, 0)
+#define CXL_DECODER_F_ENABLE    BIT(5)
+#define CXL_DECODER_F_MASK  GENMASK(5, 0)
 
 enum cxl_decoder_type {
        CXL_DECODER_ACCELERATOR = 2,
@@ -175,22 +197,28 @@ enum cxl_decoder_type {
  * struct cxl_decoder - CXL address range decode configuration
  * @dev: this decoder's device
  * @id: kernel device name id
- * @range: address range considered by this decoder
+ * @platform_res: address space resources considered by root decoder
+ * @decoder_range: address space resources considered by midlevel decoder
  * @interleave_ways: number of cxl_dports in this decode
  * @interleave_granularity: data stride per dport
  * @target_type: accelerator vs expander (type2 vs type3) selector
  * @flags: memory type capabilities and locking
+ * @target_lock: coordinate coherent reads of the target list
  * @nr_targets: number of elements in @target
  * @target: active ordered target list in current decoder configuration
  */
 struct cxl_decoder {
        struct device dev;
        int id;
-       struct range range;
+       union {
+               struct resource platform_res;
+               struct range decoder_range;
+       };
        int interleave_ways;
        int interleave_granularity;
        enum cxl_decoder_type target_type;
        unsigned long flags;
+       seqlock_t target_lock;
        int nr_targets;
        struct cxl_dport *target[];
 };
@@ -226,14 +254,6 @@ struct cxl_nvdimm {
        struct nvdimm *nvdimm;
 };
 
-struct cxl_walk_context {
-       struct device *dev;
-       struct pci_bus *root;
-       struct cxl_port *port;
-       int error;
-       int count;
-};
-
 /**
  * struct cxl_port - logical collection of upstream port devices and
  *                  downstream port devices to construct a CXL memory
@@ -242,16 +262,22 @@ struct cxl_walk_context {
  * @uport: PCI or platform device implementing the upstream port capability
  * @id: id for port device-name
  * @dports: cxl_dport instances referenced by decoders
+ * @endpoints: cxl_ep instances, endpoints that are a descendant of this port
  * @decoder_ida: allocator for decoder ids
  * @component_reg_phys: component register capability base address (optional)
+ * @dead: last ep has been removed, force port re-creation
+ * @depth: How deep this port is relative to the root. depth 0 is the root.
  */
 struct cxl_port {
        struct device dev;
        struct device *uport;
        int id;
        struct list_head dports;
+       struct list_head endpoints;
        struct ida decoder_ida;
        resource_size_t component_reg_phys;
+       bool dead;
+       unsigned int depth;
 };
 
 /**
@@ -270,19 +296,65 @@ struct cxl_dport {
        struct list_head list;
 };
 
+/**
+ * struct cxl_ep - track an endpoint's interest in a port
+ * @ep: device that hosts a generic CXL endpoint (expander or accelerator)
+ * @list: node on port->endpoints list
+ */
+struct cxl_ep {
+       struct device *ep;
+       struct list_head list;
+};
+
+/*
+ * The platform firmware device hosting the root is also the top of the
+ * CXL port topology. All other CXL ports have another CXL port as their
+ * parent and their ->uport / host device is out-of-line of the port
+ * ancestry.
+ */
+static inline bool is_cxl_root(struct cxl_port *port)
+{
+       return port->uport == port->dev.parent;
+}
+
+bool is_cxl_port(struct device *dev);
 struct cxl_port *to_cxl_port(struct device *dev);
+struct pci_bus;
+int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
+                             struct pci_bus *bus);
+struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
 struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
                                   resource_size_t component_reg_phys,
                                   struct cxl_port *parent_port);
-
-int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
-                 resource_size_t component_reg_phys);
+struct cxl_port *find_cxl_root(struct device *dev);
+int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
+int cxl_bus_rescan(void);
+struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd);
+bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
+
+struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
+                                    struct device *dport, int port_id,
+                                    resource_size_t component_reg_phys);
+struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port,
+                                       const struct device *dev);
 
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
 bool is_root_decoder(struct device *dev);
-struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets);
+bool is_cxl_decoder(struct device *dev);
+struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
+                                          unsigned int nr_targets);
+struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
+                                            unsigned int nr_targets);
 int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
+struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port);
+int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
 int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
+int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
+
+struct cxl_hdm;
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
+int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
 
 extern struct bus_type cxl_bus_type;
 
@@ -304,8 +376,14 @@ int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
 #define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME)
 void cxl_driver_unregister(struct cxl_driver *cxl_drv);
 
+#define module_cxl_driver(__cxl_driver) \
+       module_driver(__cxl_driver, cxl_driver_register, cxl_driver_unregister)
+
 #define CXL_DEVICE_NVDIMM_BRIDGE       1
 #define CXL_DEVICE_NVDIMM              2
+#define CXL_DEVICE_PORT                        3
+#define CXL_DEVICE_ROOT                        4
+#define CXL_DEVICE_MEMORY_EXPANDER     5
 
 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
 #define CXL_MODALIAS_FMT "cxl:t%d"
@@ -326,4 +404,83 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd);
 #ifndef __mock
 #define __mock static
 #endif
+
+#ifdef CONFIG_PROVE_CXL_LOCKING
+enum cxl_lock_class {
+       CXL_ANON_LOCK,
+       CXL_NVDIMM_LOCK,
+       CXL_NVDIMM_BRIDGE_LOCK,
+       CXL_PORT_LOCK,
+       /*
+        * Be careful to add new lock classes here, CXL_PORT_LOCK is
+        * extended by the port depth, so a maximum CXL port topology
+        * depth would need to be defined first.
+        */
+};
+
+static inline void cxl_nested_lock(struct device *dev)
+{
+       if (is_cxl_port(dev)) {
+               struct cxl_port *port = to_cxl_port(dev);
+
+               mutex_lock_nested(&dev->lockdep_mutex,
+                                 CXL_PORT_LOCK + port->depth);
+       } else if (is_cxl_decoder(dev)) {
+               struct cxl_port *port = to_cxl_port(dev->parent);
+
+               /*
+                * A decoder is the immediate child of a port, so set
+                * its lock class equal to other child device siblings.
+                */
+               mutex_lock_nested(&dev->lockdep_mutex,
+                                 CXL_PORT_LOCK + port->depth + 1);
+       } else if (is_cxl_nvdimm_bridge(dev))
+               mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK);
+       else if (is_cxl_nvdimm(dev))
+               mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK);
+       else
+               mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK);
+}
+
+static inline void cxl_nested_unlock(struct device *dev)
+{
+       mutex_unlock(&dev->lockdep_mutex);
+}
+
+static inline void cxl_device_lock(struct device *dev)
+{
+       /*
+        * For double lock errors the lockup will happen before lockdep
+        * warns at cxl_nested_lock(), so assert explicitly.
+        */
+       lockdep_assert_not_held(&dev->lockdep_mutex);
+
+       device_lock(dev);
+       cxl_nested_lock(dev);
+}
+
+static inline void cxl_device_unlock(struct device *dev)
+{
+       cxl_nested_unlock(dev);
+       device_unlock(dev);
+}
+#else
+static inline void cxl_nested_lock(struct device *dev)
+{
+}
+
+static inline void cxl_nested_unlock(struct device *dev)
+{
+}
+
+static inline void cxl_device_lock(struct device *dev)
+{
+       device_lock(dev);
+}
+
+static inline void cxl_device_unlock(struct device *dev)
+{
+       device_unlock(dev);
+}
+#endif
 #endif /* __CXL_H__ */
index 8d96d00..5d33ce2 100644 (file)
  * @dev: driver core device object
  * @cdev: char dev core object for ioctl operations
  * @cxlds: The device state backing this device
+ * @detach_work: active memdev lost a port in its ancestry
  * @id: id number of this memdev instance.
  */
 struct cxl_memdev {
        struct device dev;
        struct cdev cdev;
        struct cxl_dev_state *cxlds;
+       struct work_struct detach_work;
        int id;
 };
 
@@ -48,6 +50,12 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
        return container_of(dev, struct cxl_memdev, dev);
 }
 
+bool is_cxl_memdev(struct device *dev);
+static inline bool is_cxl_endpoint(struct cxl_port *port)
+{
+       return is_cxl_memdev(port->uport);
+}
+
 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
 
 /**
@@ -89,6 +97,18 @@ struct cxl_mbox_cmd {
  */
 #define CXL_CAPACITY_MULTIPLIER SZ_256M
 
+/**
+ * struct cxl_endpoint_dvsec_info - Cached DVSEC info
+ * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
+ * @ranges: Number of active HDM ranges this device uses.
+ * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
+ */
+struct cxl_endpoint_dvsec_info {
+       bool mem_enabled;
+       int ranges;
+       struct range dvsec_range[2];
+};
+
 /**
  * struct cxl_dev_state - The driver device state
  *
@@ -98,6 +118,7 @@ struct cxl_mbox_cmd {
  *
  * @dev: The device associated with this CXL state
  * @regs: Parsed register blocks
+ * @cxl_dvsec: Offset to the PCIe device DVSEC
  * @payload_size: Size of space for payload
  *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
  * @lsa_size: Size of Label Storage Area
@@ -116,7 +137,11 @@ struct cxl_mbox_cmd {
  * @active_persistent_bytes: sum of hard + soft persistent
  * @next_volatile_bytes: volatile capacity change pending device reset
  * @next_persistent_bytes: persistent capacity change pending device reset
+ * @component_reg_phys: register base of component registers
+ * @info: Cached DVSEC information about the device.
+ * @serial: PCIe Device Serial Number
  * @mbox_send: @dev specific transport for transmitting mailbox commands
+ * @wait_media_ready: @dev specific method to await media ready
  *
  * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
  * details on capacity parameters.
@@ -125,6 +150,7 @@ struct cxl_dev_state {
        struct device *dev;
 
        struct cxl_regs regs;
+       int cxl_dvsec;
 
        size_t payload_size;
        size_t lsa_size;
@@ -145,7 +171,12 @@ struct cxl_dev_state {
        u64 next_volatile_bytes;
        u64 next_persistent_bytes;
 
+       resource_size_t component_reg_phys;
+       struct cxl_endpoint_dvsec_info info;
+       u64 serial;
+
        int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
+       int (*wait_media_ready)(struct cxl_dev_state *cxlds);
 };
 
 enum cxl_opcode {
@@ -264,4 +295,12 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
 struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
+
+struct cxl_hdm {
+       struct cxl_component_regs regs;
+       unsigned int decoder_count;
+       unsigned int target_count;
+       unsigned int interleave_mask;
+       struct cxl_port *port;
+};
 #endif /* __CXL_MEM_H__ */
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
new file mode 100644 (file)
index 0000000..329e7ea
--- /dev/null
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#ifndef __CXL_PCI_H__
+#define __CXL_PCI_H__
+#include <linux/pci.h>
+#include "cxl.h"
+
+#define CXL_MEMORY_PROGIF      0x10
+
+/*
+ * See section 8.1 Configuration Space Registers in the CXL 2.0
+ * Specification. Names are taken straight from the specification with "CXL" and
+ * "DVSEC" redundancies removed. When obvious, abbreviations may be used.
+ */
+#define PCI_DVSEC_HEADER1_LENGTH_MASK  GENMASK(31, 20)
+#define PCI_DVSEC_VENDOR_ID_CXL                0x1E98
+
+/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */
+#define CXL_DVSEC_PCIE_DEVICE                                  0
+#define   CXL_DVSEC_CAP_OFFSET         0xA
+#define     CXL_DVSEC_MEM_CAPABLE      BIT(2)
+#define     CXL_DVSEC_HDM_COUNT_MASK   GENMASK(5, 4)
+#define   CXL_DVSEC_CTRL_OFFSET                0xC
+#define     CXL_DVSEC_MEM_ENABLE       BIT(2)
+#define   CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10))
+#define   CXL_DVSEC_RANGE_SIZE_LOW(i)  (0x1C + (i * 0x10))
+#define     CXL_DVSEC_MEM_INFO_VALID   BIT(0)
+#define     CXL_DVSEC_MEM_ACTIVE       BIT(1)
+#define     CXL_DVSEC_MEM_SIZE_LOW_MASK        GENMASK(31, 28)
+#define   CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10))
+#define   CXL_DVSEC_RANGE_BASE_LOW(i)  (0x24 + (i * 0x10))
+#define     CXL_DVSEC_MEM_BASE_LOW_MASK        GENMASK(31, 28)
+
+/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */
+#define CXL_DVSEC_FUNCTION_MAP                                 2
+
+/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */
+#define CXL_DVSEC_PORT_EXTENSIONS                              3
+
+/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
+#define CXL_DVSEC_PORT_GPF                                     4
+
+/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
+#define CXL_DVSEC_DEVICE_GPF                                   5
+
+/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */
+#define CXL_DVSEC_PCIE_FLEXBUS_PORT                            7
+
+/* CXL 2.0 8.1.9: Register Locator DVSEC */
+#define CXL_DVSEC_REG_LOCATOR                                  8
+#define   CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET                  0xC
+#define     CXL_DVSEC_REG_LOCATOR_BIR_MASK                     GENMASK(2, 0)
+#define            CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK                 GENMASK(15, 8)
+#define     CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK           GENMASK(31, 16)
+
+/* Register Block Identifier (RBI) */
+enum cxl_regloc_type {
+       CXL_REGLOC_RBI_EMPTY = 0,
+       CXL_REGLOC_RBI_COMPONENT,
+       CXL_REGLOC_RBI_VIRT,
+       CXL_REGLOC_RBI_MEMDEV,
+       CXL_REGLOC_RBI_TYPES
+};
+
+static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
+                                                struct cxl_register_map *map)
+{
+       if (map->block_offset == U64_MAX)
+               return CXL_RESOURCE_NONE;
+
+       return pci_resource_start(pdev, map->barno) + map->block_offset;
+}
+
+int devm_cxl_port_enumerate_dports(struct cxl_port *port);
+#endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
new file mode 100644 (file)
index 0000000..49a4b1c
--- /dev/null
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "cxlmem.h"
+#include "cxlpci.h"
+
+/**
+ * DOC: cxl mem
+ *
+ * CXL memory endpoint devices and switches are CXL capable devices that are
+ * participating in CXL.mem protocol. Their functionality builds on top of the
+ * CXL.io protocol that allows enumerating and configuring components via
+ * standard PCI mechanisms.
+ *
+ * The cxl_mem driver owns kicking off the enumeration of this CXL.mem
+ * capability. With the detection of a CXL capable endpoint, the driver will
+ * walk up to find the platform specific port it is connected to, and determine
+ * if there are intervening switches in the path. If there are switches, a
+ * secondary action is to enumerate those (implemented in cxl_core). Finally the
+ * cxl_mem driver adds the device it is bound to as a CXL endpoint-port for use
+ * in higher level operations.
+ */
+
+static int wait_for_media(struct cxl_memdev *cxlmd)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+       int rc;
+
+       if (!info->mem_enabled)
+               return -EBUSY;
+
+       rc = cxlds->wait_media_ready(cxlds);
+       if (rc)
+               return rc;
+
+       /*
+        * We know the device is active, and enabled, if any ranges are non-zero
+        * we'll need to check later before adding the port since that owns the
+        * HDM decoder registers.
+        */
+       return 0;
+}
+
+static int create_endpoint(struct cxl_memdev *cxlmd,
+                          struct cxl_port *parent_port)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_port *endpoint;
+
+       endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
+                                    cxlds->component_reg_phys, parent_port);
+       if (IS_ERR(endpoint))
+               return PTR_ERR(endpoint);
+
+       dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
+
+       if (!endpoint->dev.driver) {
+               dev_err(&cxlmd->dev, "%s failed probe\n",
+                       dev_name(&endpoint->dev));
+               return -ENXIO;
+       }
+
+       return cxl_endpoint_autoremove(cxlmd, endpoint);
+}
+
+/**
+ * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint
+ * @cxlds: Device state
+ *
+ * Additionally, enables global HDM decoding. Warning: don't call this outside
+ * of probe. Once probe is complete, the port driver owns all access to the HDM
+ * decoder registers.
+ *
+ * Returns: false if DVSEC Ranges are being used instead of HDM
+ * decoders, or if it can not be determined if DVSEC Ranges are in use.
+ * Otherwise, returns true.
+ */
+__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
+{
+       struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+       struct cxl_register_map map;
+       struct cxl_component_reg_map *cmap = &map.component_map;
+       bool global_enable, do_hdm_init = false;
+       void __iomem *crb;
+       u32 global_ctrl;
+
+       /* map hdm decoder */
+       crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
+       if (!crb) {
+               dev_dbg(cxlds->dev, "Failed to map component registers\n");
+               return false;
+       }
+
+       cxl_probe_component_regs(cxlds->dev, crb, cmap);
+       if (!cmap->hdm_decoder.valid) {
+               dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n");
+               goto out;
+       }
+
+       global_ctrl = readl(crb + cmap->hdm_decoder.offset +
+                           CXL_HDM_DECODER_CTRL_OFFSET);
+       global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE;
+       if (!global_enable && info->ranges) {
+               dev_dbg(cxlds->dev,
+                       "DVSEC ranges already programmed and HDM decoders not enabled.\n");
+               goto out;
+       }
+
+       do_hdm_init = true;
+
+       /*
+        * Permanently (for this boot at least) opt the device into HDM
+        * operation. Individual HDM decoders still need to be enabled after
+        * this point.
+        */
+       if (!global_enable) {
+               dev_dbg(cxlds->dev, "Enabling HDM decode\n");
+               writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
+                      crb + cmap->hdm_decoder.offset +
+                              CXL_HDM_DECODER_CTRL_OFFSET);
+       }
+
+out:
+       iounmap(crb);
+       return do_hdm_init;
+}
+
+static int cxl_mem_probe(struct device *dev)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_port *parent_port;
+       int rc;
+
+       /*
+        * Someone is trying to reattach this device after it lost its port
+        * connection (an endpoint port previously registered by this memdev was
+        * disabled). This racy check is ok because if the port is still gone,
+        * no harm done, and if the port hierarchy comes back it will re-trigger
+        * this probe. Port rescan and memdev detach work share the same
+        * single-threaded workqueue.
+        */
+       if (work_pending(&cxlmd->detach_work))
+               return -EBUSY;
+
+       rc = wait_for_media(cxlmd);
+       if (rc) {
+               dev_err(dev, "Media not active (%d)\n", rc);
+               return rc;
+       }
+
+       /*
+        * If DVSEC ranges are being used instead of HDM decoder registers there
+        * is no use in trying to manage those.
+        */
+       if (!cxl_dvsec_decode_init(cxlds)) {
+               struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+               int i;
+
+               /* */
+               for (i = 0; i < 2; i++) {
+                       u64 base, size;
+
+                       /*
+                        * Give a nice warning to the user that BIOS has really
+                        * botched things for them if it didn't place DVSEC
+                        * ranges in the memory map.
+                        */
+                       base = info->dvsec_range[i].start;
+                       size = range_len(&info->dvsec_range[i]);
+                       if (size && !region_intersects(base, size,
+                                                      IORESOURCE_SYSTEM_RAM,
+                                                      IORES_DESC_NONE)) {
+                               dev_err(dev,
+                                       "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n",
+                                       base, base + size - 1);
+                       }
+               }
+               dev_err(dev,
+                       "Active DVSEC range registers in use. Will not bind.\n");
+               return -EBUSY;
+       }
+
+       rc = devm_cxl_enumerate_ports(cxlmd);
+       if (rc)
+               return rc;
+
+       parent_port = cxl_mem_find_port(cxlmd);
+       if (!parent_port) {
+               dev_err(dev, "CXL port topology not found\n");
+               return -ENXIO;
+       }
+
+       cxl_device_lock(&parent_port->dev);
+       if (!parent_port->dev.driver) {
+               dev_err(dev, "CXL port topology %s not enabled\n",
+                       dev_name(&parent_port->dev));
+               rc = -ENXIO;
+               goto out;
+       }
+
+       rc = create_endpoint(cxlmd, parent_port);
+out:
+       cxl_device_unlock(&parent_port->dev);
+       put_device(&parent_port->dev);
+       return rc;
+}
+
+static struct cxl_driver cxl_mem_driver = {
+       .name = "cxl_mem",
+       .probe = cxl_mem_probe,
+       .id = CXL_DEVICE_MEMORY_EXPANDER,
+};
+
+module_cxl_driver(cxl_mem_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(CXL);
+MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER);
+/*
+ * create_endpoint() wants to validate port driver attach immediately after
+ * endpoint registration.
+ */
+MODULE_SOFTDEP("pre: cxl_port");
index 8dc91fd..8a7267d 100644 (file)
@@ -1,14 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/moduleparam.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/sizes.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/pci.h>
 #include <linux/io.h>
 #include "cxlmem.h"
-#include "pci.h"
+#include "cxlpci.h"
 #include "cxl.h"
 
 /**
 /* CXL 2.0 - 8.2.8.4 */
 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
 
+/*
+ * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
+ * dictate how long to wait for the mailbox to become ready. The new
+ * field allows the device to tell software the amount of time to wait
+ * before mailbox ready. This field per the spec theoretically allows
+ * for up to 255 seconds. 255 seconds is unreasonably long, its longer
+ * than the maximum SATA port link recovery wait. Default to 60 seconds
+ * until someone builds a CXL device that needs more time in practice.
+ */
+static unsigned short mbox_ready_timeout = 60;
+module_param(mbox_ready_timeout, ushort, 0644);
+MODULE_PARM_DESC(mbox_ready_timeout,
+                "seconds to wait for mailbox ready / memory active status");
+
 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 {
        const unsigned long start = jiffies;
@@ -57,14 +73,16 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
        return 0;
 }
 
-static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds,
-                                struct cxl_mbox_cmd *mbox_cmd)
-{
-       struct device *dev = cxlds->dev;
+#define cxl_err(dev, status, msg)                                        \
+       dev_err_ratelimited(dev, msg ", device state %s%s\n",                  \
+                           status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
+                           status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 
-       dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
-               mbox_cmd->opcode, mbox_cmd->size_in);
-}
+#define cxl_cmd_err(dev, cmd, status, msg)                               \
+       dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n",    \
+                           (cmd)->opcode,                                     \
+                           status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
+                           status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 
 /**
  * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
@@ -118,7 +136,11 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 
        /* #1 */
        if (cxl_doorbell_busy(cxlds)) {
-               dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
+               u64 md_status =
+                       readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+               cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
+                           "mailbox queue busy");
                return -EBUSY;
        }
 
@@ -144,7 +166,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
        /* #5 */
        rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
        if (rc == -ETIMEDOUT) {
-               cxl_pci_mbox_timeout(cxlds, mbox_cmd);
+               u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+               cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
                return rc;
        }
 
@@ -182,98 +206,13 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
        return 0;
 }
 
-/**
- * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
- * @cxlds: The device state to gain access to.
- *
- * Context: Any context. Takes the mbox_mutex.
- * Return: 0 if exclusive access was acquired.
- */
-static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds)
-{
-       struct device *dev = cxlds->dev;
-       u64 md_status;
-       int rc;
-
-       mutex_lock_io(&cxlds->mbox_mutex);
-
-       /*
-        * XXX: There is some amount of ambiguity in the 2.0 version of the spec
-        * around the mailbox interface ready (8.2.8.5.1.1).  The purpose of the
-        * bit is to allow firmware running on the device to notify the driver
-        * that it's ready to receive commands. It is unclear if the bit needs
-        * to be read for each transaction mailbox, ie. the firmware can switch
-        * it on and off as needed. Second, there is no defined timeout for
-        * mailbox ready, like there is for the doorbell interface.
-        *
-        * Assumptions:
-        * 1. The firmware might toggle the Mailbox Interface Ready bit, check
-        *    it for every command.
-        *
-        * 2. If the doorbell is clear, the firmware should have first set the
-        *    Mailbox Interface Ready bit. Therefore, waiting for the doorbell
-        *    to be ready is sufficient.
-        */
-       rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
-       if (rc) {
-               dev_warn(dev, "Mailbox interface not ready\n");
-               goto out;
-       }
-
-       md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
-       if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
-               dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
-               rc = -EBUSY;
-               goto out;
-       }
-
-       /*
-        * Hardware shouldn't allow a ready status but also have failure bits
-        * set. Spit out an error, this should be a bug report
-        */
-       rc = -EFAULT;
-       if (md_status & CXLMDEV_DEV_FATAL) {
-               dev_err(dev, "mbox: reported ready, but fatal\n");
-               goto out;
-       }
-       if (md_status & CXLMDEV_FW_HALT) {
-               dev_err(dev, "mbox: reported ready, but halted\n");
-               goto out;
-       }
-       if (CXLMDEV_RESET_NEEDED(md_status)) {
-               dev_err(dev, "mbox: reported ready, but reset needed\n");
-               goto out;
-       }
-
-       /* with lock held */
-       return 0;
-
-out:
-       mutex_unlock(&cxlds->mbox_mutex);
-       return rc;
-}
-
-/**
- * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
- * @cxlds: The device state to communicate with.
- *
- * Context: Any context. Expects mbox_mutex to be held.
- */
-static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds)
-{
-       mutex_unlock(&cxlds->mbox_mutex);
-}
-
 static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
 {
        int rc;
 
-       rc = cxl_pci_mbox_get(cxlds);
-       if (rc)
-               return rc;
-
+       mutex_lock_io(&cxlds->mbox_mutex);
        rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
-       cxl_pci_mbox_put(cxlds);
+       mutex_unlock(&cxlds->mbox_mutex);
 
        return rc;
 }
@@ -281,6 +220,34 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
 static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 {
        const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+       unsigned long timeout;
+       u64 md_status;
+
+       timeout = jiffies + mbox_ready_timeout * HZ;
+       do {
+               md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+               if (md_status & CXLMDEV_MBOX_IF_READY)
+                       break;
+               if (msleep_interruptible(100))
+                       break;
+       } while (!time_after(jiffies, timeout));
+
+       if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
+               cxl_err(cxlds->dev, md_status,
+                       "timeout awaiting mailbox ready");
+               return -ETIMEDOUT;
+       }
+
+       /*
+        * A command may be in flight from a previous driver instance,
+        * think kexec, do one doorbell wait so that
+        * __cxl_pci_mbox_send_cmd() can assume that it is the only
+        * source for future doorbell busy events.
+        */
+       if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
+               cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
+               return -ETIMEDOUT;
+       }
 
        cxlds->mbox_send = cxl_pci_mbox_send;
        cxlds->payload_size =
@@ -400,75 +367,182 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma
        return 0;
 }
 
-static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
-                               struct cxl_register_map *map)
+static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
+                         struct cxl_register_map *map)
 {
-       map->block_offset =
-               ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
-       map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
-       map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
+       int rc;
+
+       rc = cxl_find_regblock(pdev, type, map);
+       if (rc)
+               return rc;
+
+       rc = cxl_map_regblock(pdev, map);
+       if (rc)
+               return rc;
+
+       rc = cxl_probe_regs(pdev, map);
+       cxl_unmap_regblock(pdev, map);
+
+       return rc;
 }
 
-/**
- * cxl_find_regblock() - Locate register blocks by type
- * @pdev: The CXL PCI device to enumerate.
- * @type: Register Block Indicator id
- * @map: Enumeration output, clobbered on error
- *
- * Return: 0 if register block enumerated, negative error code otherwise
- *
- * A CXL DVSEC may point to one or more register blocks, search for them
- * by @type.
- */
-static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
-                            struct cxl_register_map *map)
+static int wait_for_valid(struct cxl_dev_state *cxlds)
 {
-       u32 regloc_size, regblocks;
-       int regloc, i;
+       struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+       int d = cxlds->cxl_dvsec, rc;
+       u32 val;
 
-       regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
-                                          PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
-       if (!regloc)
-               return -ENXIO;
+       /*
+        * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
+        * and Size Low registers are valid. Must be set within 1 second of
+        * deassertion of reset to CXL device. Likely it is already set by the
+        * time this runs, but otherwise give a 1.5 second timeout in case of
+        * clock skew.
+        */
+       rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+       if (rc)
+               return rc;
+
+       if (val & CXL_DVSEC_MEM_INFO_VALID)
+               return 0;
+
+       msleep(1500);
+
+       rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+       if (rc)
+               return rc;
 
-       pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
-       regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
+       if (val & CXL_DVSEC_MEM_INFO_VALID)
+               return 0;
 
-       regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
-       regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
+       return -ETIMEDOUT;
+}
+
+/*
+ * Wait up to @mbox_ready_timeout for the device to report memory
+ * active.
+ */
+static int wait_for_media_ready(struct cxl_dev_state *cxlds)
+{
+       struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+       int d = cxlds->cxl_dvsec;
+       bool active = false;
+       u64 md_status;
+       int rc, i;
+
+       rc = wait_for_valid(cxlds);
+       if (rc)
+               return rc;
 
-       for (i = 0; i < regblocks; i++, regloc += 8) {
-               u32 reg_lo, reg_hi;
+       for (i = mbox_ready_timeout; i; i--) {
+               u32 temp;
+               int rc;
 
-               pci_read_config_dword(pdev, regloc, &reg_lo);
-               pci_read_config_dword(pdev, regloc + 4, &reg_hi);
+               rc = pci_read_config_dword(
+                       pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+               if (rc)
+                       return rc;
 
-               cxl_decode_regblock(reg_lo, reg_hi, map);
+               active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
+               if (active)
+                       break;
+               msleep(1000);
+       }
 
-               if (map->reg_type == type)
-                       return 0;
+       if (!active) {
+               dev_err(&pdev->dev,
+                       "timeout awaiting memory active after %d seconds\n",
+                       mbox_ready_timeout);
+               return -ETIMEDOUT;
        }
 
-       return -ENODEV;
+       md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+       if (!CXLMDEV_READY(md_status))
+               return -EIO;
+
+       return 0;
 }
 
-static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
-                         struct cxl_register_map *map)
+static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds)
 {
-       int rc;
+       struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+       struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+       int d = cxlds->cxl_dvsec;
+       int hdm_count, rc, i;
+       u16 cap, ctrl;
 
-       rc = cxl_find_regblock(pdev, type, map);
+       if (!d)
+               return -ENXIO;
+
+       rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
        if (rc)
                return rc;
 
-       rc = cxl_map_regblock(pdev, map);
+       rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
        if (rc)
                return rc;
 
-       rc = cxl_probe_regs(pdev, map);
-       cxl_unmap_regblock(pdev, map);
+       if (!(cap & CXL_DVSEC_MEM_CAPABLE))
+               return -ENXIO;
 
-       return rc;
+       /*
+        * It is not allowed by spec for MEM.capable to be set and have 0 legacy
+        * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
+        * driver is for a spec defined class code which must be CXL.mem
+        * capable, there is no point in continuing to enable CXL.mem.
+        */
+       hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+       if (!hdm_count || hdm_count > 2)
+               return -EINVAL;
+
+       rc = wait_for_valid(cxlds);
+       if (rc)
+               return rc;
+
+       info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+
+       for (i = 0; i < hdm_count; i++) {
+               u64 base, size;
+               u32 temp;
+
+               rc = pci_read_config_dword(
+                       pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
+               if (rc)
+                       return rc;
+
+               size = (u64)temp << 32;
+
+               rc = pci_read_config_dword(
+                       pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
+               if (rc)
+                       return rc;
+
+               size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
+
+               rc = pci_read_config_dword(
+                       pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
+               if (rc)
+                       return rc;
+
+               base = (u64)temp << 32;
+
+               rc = pci_read_config_dword(
+                       pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
+               if (rc)
+                       return rc;
+
+               base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
+
+               info->dvsec_range[i] = (struct range) {
+                       .start = base,
+                       .end = base + size - 1
+               };
+
+               if (size)
+                       info->ranges++;
+       }
+
+       return 0;
 }
 
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -493,6 +567,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (IS_ERR(cxlds))
                return PTR_ERR(cxlds);
 
+       cxlds->serial = pci_get_dsn(pdev);
+       cxlds->cxl_dvsec = pci_find_dvsec_capability(
+               pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
+       if (!cxlds->cxl_dvsec)
+               dev_warn(&pdev->dev,
+                        "Device DVSEC not present, skip CXL.mem init\n");
+
+       cxlds->wait_media_ready = wait_for_media_ready;
+
        rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
        if (rc)
                return rc;
@@ -501,6 +584,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (rc)
                return rc;
 
+       /*
+        * If the component registers can't be found, the cxl_pci driver may
+        * still be useful for management functions so don't return an error.
+        */
+       cxlds->component_reg_phys = CXL_RESOURCE_NONE;
+       rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+       if (rc)
+               dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
+
+       cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
+
        rc = cxl_pci_setup_mailbox(cxlds);
        if (rc)
                return rc;
@@ -517,6 +611,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (rc)
                return rc;
 
+       rc = cxl_dvsec_ranges(cxlds);
+       if (rc)
+               dev_warn(&pdev->dev,
+                        "Failed to get DVSEC range information (%d)\n", rc);
+
        cxlmd = devm_cxl_add_memdev(cxlds);
        if (IS_ERR(cxlmd))
                return PTR_ERR(cxlmd);
diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
deleted file mode 100644 (file)
index 7d3e4bf..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#ifndef __CXL_PCI_H__
-#define __CXL_PCI_H__
-
-#define CXL_MEMORY_PROGIF      0x10
-
-/*
- * See section 8.1 Configuration Space Registers in the CXL 2.0
- * Specification
- */
-#define PCI_DVSEC_HEADER1_LENGTH_MASK  GENMASK(31, 20)
-#define PCI_DVSEC_VENDOR_ID_CXL                0x1E98
-#define PCI_DVSEC_ID_CXL               0x0
-
-#define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID       0x8
-#define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET  0xC
-
-/* BAR Indicator Register (BIR) */
-#define CXL_REGLOC_BIR_MASK GENMASK(2, 0)
-
-/* Register Block Identifier (RBI) */
-enum cxl_regloc_type {
-       CXL_REGLOC_RBI_EMPTY = 0,
-       CXL_REGLOC_RBI_COMPONENT,
-       CXL_REGLOC_RBI_VIRT,
-       CXL_REGLOC_RBI_MEMDEV,
-       CXL_REGLOC_RBI_TYPES
-};
-
-#define CXL_REGLOC_RBI_MASK GENMASK(15, 8)
-#define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
-
-#endif /* __CXL_PCI_H__ */
index b65a272..15ad666 100644 (file)
@@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev)
        if (!cxl_nvb)
                return -ENXIO;
 
-       device_lock(&cxl_nvb->dev);
+       cxl_device_lock(&cxl_nvb->dev);
        if (!cxl_nvb->nvdimm_bus) {
                rc = -ENXIO;
                goto out;
@@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev)
        dev_set_drvdata(dev, nvdimm);
        rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
 out:
-       device_unlock(&cxl_nvb->dev);
+       cxl_device_unlock(&cxl_nvb->dev);
        put_device(&cxl_nvb->dev);
 
        return rc;
@@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work)
        struct nvdimm_bus *victim_bus = NULL;
        bool release = false, rescan = false;
 
-       device_lock(&cxl_nvb->dev);
+       cxl_device_lock(&cxl_nvb->dev);
        switch (cxl_nvb->state) {
        case CXL_NVB_ONLINE:
                if (!online_nvdimm_bus(cxl_nvb)) {
@@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work)
        default:
                break;
        }
-       device_unlock(&cxl_nvb->dev);
+       cxl_device_unlock(&cxl_nvb->dev);
 
        if (release)
                device_release_driver(&cxl_nvb->dev);
@@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
                return 0;
 
        cxl_nvb = to_cxl_nvdimm_bridge(dev);
-       device_lock(dev);
+       cxl_device_lock(dev);
        cxl_nvb->state = CXL_NVB_NEW;
-       device_unlock(dev);
+       cxl_device_unlock(dev);
 
        return 0;
 }
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
new file mode 100644 (file)
index 0000000..d420da5
--- /dev/null
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "cxlmem.h"
+#include "cxlpci.h"
+
+/**
+ * DOC: cxl port
+ *
+ * The port driver enumerates dport via PCI and scans for HDM
+ * (Host-managed-Device-Memory) decoder resources via the
+ * @component_reg_phys value passed in by the agent that registered the
+ * port. All descendant ports of a CXL root port (described by platform
+ * firmware) are managed in this drivers context. Each driver instance
+ * is responsible for tearing down the driver context of immediate
+ * descendant ports. The locking for this is validated by
+ * CONFIG_PROVE_CXL_LOCKING.
+ *
+ * The primary service this driver provides is presenting APIs to other
+ * drivers to utilize the decoders, and indicating to userspace (via bind
+ * status) the connectivity of the CXL.mem protocol throughout the
+ * PCIe topology.
+ */
+
+static void schedule_detach(void *cxlmd)
+{
+       schedule_cxl_memdev_detach(cxlmd);
+}
+
+static int cxl_port_probe(struct device *dev)
+{
+       struct cxl_port *port = to_cxl_port(dev);
+       struct cxl_hdm *cxlhdm;
+       int rc;
+
+       if (is_cxl_endpoint(port)) {
+               struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
+
+               get_device(&cxlmd->dev);
+               rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd);
+               if (rc)
+                       return rc;
+       } else {
+               rc = devm_cxl_port_enumerate_dports(port);
+               if (rc < 0)
+                       return rc;
+               if (rc == 1)
+                       return devm_cxl_add_passthrough_decoder(port);
+       }
+
+       cxlhdm = devm_cxl_setup_hdm(port);
+       if (IS_ERR(cxlhdm))
+               return PTR_ERR(cxlhdm);
+
+       rc = devm_cxl_enumerate_decoders(cxlhdm);
+       if (rc) {
+               dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
+               return rc;
+       }
+
+       return 0;
+}
+
+static struct cxl_driver cxl_port_driver = {
+       .name = "cxl_port",
+       .probe = cxl_port_probe,
+       .id = CXL_DEVICE_PORT,
+};
+
+module_cxl_driver(cxl_port_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(CXL);
+MODULE_ALIAS_CXL(CXL_DEVICE_PORT);
index a11850d..2650a85 100644 (file)
@@ -185,7 +185,7 @@ static inline void devm_nsio_disable(struct device *dev,
 }
 #endif
 
-#ifdef CONFIG_PROVE_LOCKING
+#ifdef CONFIG_PROVE_NVDIMM_LOCKING
 extern struct class *nd_class;
 
 enum {
index cfe3b09..075cd25 100644 (file)
@@ -1544,6 +1544,29 @@ config CSD_LOCK_WAIT_DEBUG
          include the IPI handler function currently executing (if any)
          and relevant stack traces.
 
+choice
+       prompt "Lock debugging: prove subsystem device_lock() correctness"
+       depends on PROVE_LOCKING
+       help
+         For subsystems that have instrumented their usage of the device_lock()
+         with nested annotations, enable lock dependency checking. The locking
+         hierarchy 'subclass' identifiers are not compatible across
+         sub-systems, so only one can be enabled at a time.
+
+config PROVE_NVDIMM_LOCKING
+       bool "NVDIMM"
+       depends on LIBNVDIMM
+       help
+         Enable lockdep to validate nd_device_lock() usage.
+
+config PROVE_CXL_LOCKING
+       bool "CXL"
+       depends on CXL_BUS
+       help
+         Enable lockdep to validate cxl_device_lock() usage.
+
+endchoice
+
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
index 1acdf2f..82e49ab 100644 (file)
@@ -3,8 +3,11 @@ ldflags-y += --wrap=acpi_table_parse_cedt
 ldflags-y += --wrap=is_acpi_device_node
 ldflags-y += --wrap=acpi_evaluate_integer
 ldflags-y += --wrap=acpi_pci_find_root
-ldflags-y += --wrap=pci_walk_bus
 ldflags-y += --wrap=nvdimm_bus_register
+ldflags-y += --wrap=devm_cxl_port_enumerate_dports
+ldflags-y += --wrap=devm_cxl_setup_hdm
+ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
+ldflags-y += --wrap=devm_cxl_enumerate_decoders
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
@@ -23,15 +26,26 @@ obj-m += cxl_pmem.o
 cxl_pmem-y := $(CXL_SRC)/pmem.o
 cxl_pmem-y += config_check.o
 
+obj-m += cxl_port.o
+
+cxl_port-y := $(CXL_SRC)/port.o
+cxl_port-y += config_check.o
+
+obj-m += cxl_mem.o
+
+cxl_mem-y := $(CXL_SRC)/mem.o
+cxl_mem-y += mock_mem.o
+cxl_mem-y += config_check.o
+
 obj-m += cxl_core.o
 
-cxl_core-y := $(CXL_CORE_SRC)/bus.o
+cxl_core-y := $(CXL_CORE_SRC)/port.o
 cxl_core-y += $(CXL_CORE_SRC)/pmem.o
 cxl_core-y += $(CXL_CORE_SRC)/regs.o
 cxl_core-y += $(CXL_CORE_SRC)/memdev.o
 cxl_core-y += $(CXL_CORE_SRC)/mbox.o
+cxl_core-y += $(CXL_CORE_SRC)/pci.o
+cxl_core-y += $(CXL_CORE_SRC)/hdm.o
 cxl_core-y += config_check.o
 
-cxl_core-y += mock_pmem.o
-
 obj-m += test/
index 4c8a493..55813de 100644 (file)
@@ -4,7 +4,6 @@
 #include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/acpi.h>
-#include <linux/pci.h>
 #include <cxl.h>
 #include "test/mock.h"
 
@@ -34,76 +33,3 @@ out:
        put_cxl_mock_ops(index);
        return found;
 }
-
-static int match_add_root_port(struct pci_dev *pdev, void *data)
-{
-       struct cxl_walk_context *ctx = data;
-       struct pci_bus *root_bus = ctx->root;
-       struct cxl_port *port = ctx->port;
-       int type = pci_pcie_type(pdev);
-       struct device *dev = ctx->dev;
-       u32 lnkcap, port_num;
-       int rc;
-
-       if (pdev->bus != root_bus)
-               return 0;
-       if (!pci_is_pcie(pdev))
-               return 0;
-       if (type != PCI_EXP_TYPE_ROOT_PORT)
-               return 0;
-       if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
-                                 &lnkcap) != PCIBIOS_SUCCESSFUL)
-               return 0;
-
-       /* TODO walk DVSEC to find component register base */
-       port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-       rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE);
-       if (rc) {
-               dev_err(dev, "failed to add dport: %s (%d)\n",
-                       dev_name(&pdev->dev), rc);
-               ctx->error = rc;
-               return rc;
-       }
-       ctx->count++;
-
-       dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
-
-       return 0;
-}
-
-static int mock_add_root_port(struct platform_device *pdev, void *data)
-{
-       struct cxl_walk_context *ctx = data;
-       struct cxl_port *port = ctx->port;
-       struct device *dev = ctx->dev;
-       int rc;
-
-       rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE);
-       if (rc) {
-               dev_err(dev, "failed to add dport: %s (%d)\n",
-                       dev_name(&pdev->dev), rc);
-               ctx->error = rc;
-               return rc;
-       }
-       ctx->count++;
-
-       dev_dbg(dev, "add dport%d: %s\n", pdev->id, dev_name(&pdev->dev));
-
-       return 0;
-}
-
-int match_add_root_ports(struct pci_dev *dev, void *data)
-{
-       int index, rc;
-       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-       struct platform_device *pdev = (struct platform_device *) dev;
-
-       if (ops && ops->is_mock_port(pdev))
-               rc = mock_add_root_port(pdev, data);
-       else
-               rc = match_add_root_port(dev, data);
-
-       put_cxl_mock_ops(index);
-
-       return rc;
-}
diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c
new file mode 100644 (file)
index 0000000..d1dec58
--- /dev/null
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include <linux/types.h>
+
+struct cxl_dev_state;
+bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
+{
+       return true;
+}
diff --git a/tools/testing/cxl/mock_pmem.c b/tools/testing/cxl/mock_pmem.c
deleted file mode 100644 (file)
index f7315e6..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
-#include <cxl.h>
-#include "test/mock.h"
-#include <core/core.h>
-
-int match_nvdimm_bridge(struct device *dev, const void *data)
-{
-       int index, rc = 0;
-       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-       const struct cxl_nvdimm *cxl_nvd = data;
-
-       if (ops) {
-               if (dev->type == &cxl_nvdimm_bridge_type &&
-                   (ops->is_mock_dev(dev->parent->parent) ==
-                    ops->is_mock_dev(cxl_nvd->dev.parent->parent)))
-                       rc = 1;
-       } else
-               rc = dev->type == &cxl_nvdimm_bridge_type;
-
-       put_cxl_mock_ops(index);
-
-       return rc;
-}
index 736d990..431f2bd 100644 (file)
@@ -8,16 +8,25 @@
 #include <linux/acpi.h>
 #include <linux/pci.h>
 #include <linux/mm.h>
+#include <cxlmem.h>
 #include "mock.h"
 
-#define NR_CXL_HOST_BRIDGES 4
+#define NR_CXL_HOST_BRIDGES 2
 #define NR_CXL_ROOT_PORTS 2
+#define NR_CXL_SWITCH_PORTS 2
+#define NR_CXL_PORT_DECODERS 2
 
 static struct platform_device *cxl_acpi;
 static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
 static struct platform_device
        *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS];
-struct platform_device *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS];
+static struct platform_device
+       *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS];
+static struct platform_device
+       *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS *
+                         NR_CXL_SWITCH_PORTS];
+struct platform_device
+       *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS];
 
 static struct acpi_device acpi0017_mock;
 static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = {
@@ -27,12 +36,6 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = {
        [1] = {
                .handle = &host_bridge[1],
        },
-       [2] = {
-               .handle = &host_bridge[2],
-       },
-       [3] = {
-               .handle = &host_bridge[3],
-       },
 };
 
 static bool is_mock_dev(struct device *dev)
@@ -70,7 +73,7 @@ static struct {
        } cfmws0;
        struct {
                struct acpi_cedt_cfmws cfmws;
-               u32 target[4];
+               u32 target[2];
        } cfmws1;
        struct {
                struct acpi_cedt_cfmws cfmws;
@@ -78,7 +81,7 @@ static struct {
        } cfmws2;
        struct {
                struct acpi_cedt_cfmws cfmws;
-               u32 target[4];
+               u32 target[2];
        } cfmws3;
 } __packed mock_cedt = {
        .cedt = {
@@ -104,22 +107,6 @@ static struct {
                .uid = 1,
                .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
        },
-       .chbs[2] = {
-               .header = {
-                       .type = ACPI_CEDT_TYPE_CHBS,
-                       .length = sizeof(mock_cedt.chbs[0]),
-               },
-               .uid = 2,
-               .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
-       },
-       .chbs[3] = {
-               .header = {
-                       .type = ACPI_CEDT_TYPE_CHBS,
-                       .length = sizeof(mock_cedt.chbs[0]),
-               },
-               .uid = 3,
-               .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
-       },
        .cfmws0 = {
                .cfmws = {
                        .header = {
@@ -141,14 +128,14 @@ static struct {
                                .type = ACPI_CEDT_TYPE_CFMWS,
                                .length = sizeof(mock_cedt.cfmws1),
                        },
-                       .interleave_ways = 2,
+                       .interleave_ways = 1,
                        .granularity = 4,
                        .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                        ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
                        .qtg_id = 1,
-                       .window_size = SZ_256M * 4,
+                       .window_size = SZ_256M * 2,
                },
-               .target = { 0, 1, 2, 3 },
+               .target = { 0, 1, },
        },
        .cfmws2 = {
                .cfmws = {
@@ -171,14 +158,14 @@ static struct {
                                .type = ACPI_CEDT_TYPE_CFMWS,
                                .length = sizeof(mock_cedt.cfmws3),
                        },
-                       .interleave_ways = 2,
+                       .interleave_ways = 1,
                        .granularity = 4,
                        .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                        ACPI_CEDT_CFMWS_RESTRICT_PMEM,
                        .qtg_id = 3,
-                       .window_size = SZ_256M * 4,
+                       .window_size = SZ_256M * 2,
                },
-               .target = { 0, 1, 2, 3 },
+               .target = { 0, 1, },
        },
 };
 
@@ -317,6 +304,30 @@ static bool is_mock_bridge(struct device *dev)
        for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++)
                if (dev == &cxl_host_bridge[i]->dev)
                        return true;
+       return false;
+}
+
+static bool is_mock_port(struct device *dev)
+{
+       int i;
+
+       if (is_mock_bridge(dev))
+               return true;
+
+       for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++)
+               if (dev == &cxl_root_port[i]->dev)
+                       return true;
+
+       for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++)
+               if (dev == &cxl_switch_uport[i]->dev)
+                       return true;
+
+       for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++)
+               if (dev == &cxl_switch_dport[i]->dev)
+                       return true;
+
+       if (is_cxl_memdev(dev))
+               return is_mock_dev(dev->parent);
 
        return false;
 }
@@ -358,34 +369,8 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = {
        [1] = {
                .bus = &mock_pci_bus[1],
        },
-       [2] = {
-               .bus = &mock_pci_bus[2],
-       },
-       [3] = {
-               .bus = &mock_pci_bus[3],
-       },
 };
 
-static struct platform_device *mock_cxl_root_port(struct pci_bus *bus, int index)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++)
-               if (bus == &mock_pci_bus[i])
-                       return cxl_root_port[index + i * NR_CXL_ROOT_PORTS];
-       return NULL;
-}
-
-static bool is_mock_port(struct platform_device *pdev)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++)
-               if (pdev == cxl_root_port[i])
-                       return true;
-       return false;
-}
-
 static bool is_mock_bus(struct pci_bus *bus)
 {
        int i;
@@ -405,16 +390,166 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle)
        return &mock_pci_root[host_bridge_index(adev)];
 }
 
+static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port)
+{
+       struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
+
+       if (!cxlhdm)
+               return ERR_PTR(-ENOMEM);
+
+       cxlhdm->port = port;
+       return cxlhdm;
+}
+
+static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
+{
+       dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
+       return -EOPNOTSUPP;
+}
+
+
+struct target_map_ctx {
+       int *target_map;
+       int index;
+       int target_count;
+};
+
+static int map_targets(struct device *dev, void *data)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct target_map_ctx *ctx = data;
+
+       ctx->target_map[ctx->index++] = pdev->id;
+
+       if (ctx->index > ctx->target_count) {
+               dev_WARN_ONCE(dev, 1, "too many targets found?\n");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
+static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
+{
+       struct cxl_port *port = cxlhdm->port;
+       struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+       int target_count, i;
+
+       if (is_cxl_endpoint(port))
+               target_count = 0;
+       else if (is_cxl_root(parent_port))
+               target_count = NR_CXL_ROOT_PORTS;
+       else
+               target_count = NR_CXL_SWITCH_PORTS;
+
+       for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
+               int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
+               struct target_map_ctx ctx = {
+                       .target_map = target_map,
+                       .target_count = target_count,
+               };
+               struct cxl_decoder *cxld;
+               int rc;
+
+               if (target_count)
+                       cxld = cxl_switch_decoder_alloc(port, target_count);
+               else
+                       cxld = cxl_endpoint_decoder_alloc(port);
+               if (IS_ERR(cxld)) {
+                       dev_warn(&port->dev,
+                                "Failed to allocate the decoder\n");
+                       return PTR_ERR(cxld);
+               }
+
+               cxld->decoder_range = (struct range) {
+                       .start = 0,
+                       .end = -1,
+               };
+
+               cxld->flags = CXL_DECODER_F_ENABLE;
+               cxld->interleave_ways = min_not_zero(target_count, 1);
+               cxld->interleave_granularity = SZ_4K;
+               cxld->target_type = CXL_DECODER_EXPANDER;
+
+               if (target_count) {
+                       rc = device_for_each_child(port->uport, &ctx,
+                                                  map_targets);
+                       if (rc) {
+                               put_device(&cxld->dev);
+                               return rc;
+                       }
+               }
+
+               rc = cxl_decoder_add_locked(cxld, target_map);
+               if (rc) {
+                       put_device(&cxld->dev);
+                       dev_err(&port->dev, "Failed to add decoder\n");
+                       return rc;
+               }
+
+               rc = cxl_decoder_autoremove(&port->dev, cxld);
+               if (rc)
+                       return rc;
+               dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+       }
+
+       return 0;
+}
+
+static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+       struct device *dev = &port->dev;
+       struct platform_device **array;
+       int i, array_size;
+
+       if (port->depth == 1) {
+               array_size = ARRAY_SIZE(cxl_root_port);
+               array = cxl_root_port;
+       } else if (port->depth == 2) {
+               array_size = ARRAY_SIZE(cxl_switch_dport);
+               array = cxl_switch_dport;
+       } else {
+               dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n",
+                             port->depth);
+               return -ENXIO;
+       }
+
+       for (i = 0; i < array_size; i++) {
+               struct platform_device *pdev = array[i];
+               struct cxl_dport *dport;
+
+               if (pdev->dev.parent != port->uport)
+                       continue;
+
+               dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+                                          CXL_RESOURCE_NONE);
+
+               if (IS_ERR(dport)) {
+                       dev_err(dev, "failed to add dport: %s (%ld)\n",
+                               dev_name(&pdev->dev), PTR_ERR(dport));
+                       return PTR_ERR(dport);
+               }
+
+               dev_dbg(dev, "add dport%d: %s\n", pdev->id,
+                       dev_name(&pdev->dev));
+       }
+
+       return 0;
+}
+
 static struct cxl_mock_ops cxl_mock_ops = {
        .is_mock_adev = is_mock_adev,
        .is_mock_bridge = is_mock_bridge,
        .is_mock_bus = is_mock_bus,
        .is_mock_port = is_mock_port,
        .is_mock_dev = is_mock_dev,
-       .mock_port = mock_cxl_root_port,
        .acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
        .acpi_evaluate_integer = mock_acpi_evaluate_integer,
        .acpi_pci_find_root = mock_acpi_pci_find_root,
+       .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
+       .devm_cxl_setup_hdm = mock_cxl_setup_hdm,
+       .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
+       .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
        .list = LIST_HEAD_INIT(cxl_mock_ops.list),
 };
 
@@ -506,12 +641,17 @@ static __init int cxl_test_init(void)
                        platform_device_put(pdev);
                        goto err_bridge;
                }
+
                cxl_host_bridge[i] = pdev;
+               rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
+                                      "physical_node");
+               if (rc)
+                       goto err_bridge;
        }
 
        for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) {
                struct platform_device *bridge =
-                       cxl_host_bridge[i / NR_CXL_ROOT_PORTS];
+                       cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)];
                struct platform_device *pdev;
 
                pdev = platform_device_alloc("cxl_root_port", i);
@@ -527,15 +667,52 @@ static __init int cxl_test_init(void)
                cxl_root_port[i] = pdev;
        }
 
-       BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_root_port));
+       BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port));
+       for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) {
+               struct platform_device *root_port = cxl_root_port[i];
+               struct platform_device *pdev;
+
+               pdev = platform_device_alloc("cxl_switch_uport", i);
+               if (!pdev)
+                       goto err_port;
+               pdev->dev.parent = &root_port->dev;
+
+               rc = platform_device_add(pdev);
+               if (rc) {
+                       platform_device_put(pdev);
+                       goto err_uport;
+               }
+               cxl_switch_uport[i] = pdev;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) {
+               struct platform_device *uport =
+                       cxl_switch_uport[i % ARRAY_SIZE(cxl_switch_uport)];
+               struct platform_device *pdev;
+
+               pdev = platform_device_alloc("cxl_switch_dport", i);
+               if (!pdev)
+                       goto err_port;
+               pdev->dev.parent = &uport->dev;
+
+               rc = platform_device_add(pdev);
+               if (rc) {
+                       platform_device_put(pdev);
+                       goto err_dport;
+               }
+               cxl_switch_dport[i] = pdev;
+       }
+
+       BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport));
        for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) {
-               struct platform_device *port = cxl_root_port[i];
+               struct platform_device *dport = cxl_switch_dport[i];
                struct platform_device *pdev;
 
                pdev = alloc_memdev(i);
                if (!pdev)
                        goto err_mem;
-               pdev->dev.parent = &port->dev;
+               pdev->dev.parent = &dport->dev;
+               set_dev_node(&pdev->dev, i % 2);
 
                rc = platform_device_add(pdev);
                if (rc) {
@@ -563,12 +740,24 @@ err_add:
 err_mem:
        for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
                platform_device_unregister(cxl_mem[i]);
+err_dport:
+       for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--)
+               platform_device_unregister(cxl_switch_dport[i]);
+err_uport:
+       for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--)
+               platform_device_unregister(cxl_switch_uport[i]);
 err_port:
        for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
                platform_device_unregister(cxl_root_port[i]);
 err_bridge:
-       for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--)
+       for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) {
+               struct platform_device *pdev = cxl_host_bridge[i];
+
+               if (!pdev)
+                       continue;
+               sysfs_remove_link(&pdev->dev.kobj, "physical_node");
                platform_device_unregister(cxl_host_bridge[i]);
+       }
 err_populate:
        depopulate_all_mock_resources();
 err_gen_pool_add:
@@ -585,10 +774,20 @@ static __exit void cxl_test_exit(void)
        platform_device_unregister(cxl_acpi);
        for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
                platform_device_unregister(cxl_mem[i]);
+       for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--)
+               platform_device_unregister(cxl_switch_dport[i]);
+       for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--)
+               platform_device_unregister(cxl_switch_uport[i]);
        for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
                platform_device_unregister(cxl_root_port[i]);
-       for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--)
+       for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) {
+               struct platform_device *pdev = cxl_host_bridge[i];
+
+               if (!pdev)
+                       continue;
+               sysfs_remove_link(&pdev->dev.kobj, "physical_node");
                platform_device_unregister(cxl_host_bridge[i]);
+       }
        depopulate_all_mock_resources();
        gen_pool_destroy(cxl_mock_pool);
        unregister_cxl_mock_ops(&cxl_mock_ops);
@@ -598,3 +797,4 @@ module_init(cxl_test_init);
 module_exit(cxl_test_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
+MODULE_IMPORT_NS(CXL);
index 8c2086c..b6b726e 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/sizes.h>
 #include <linux/bits.h>
 #include <cxlmem.h>
@@ -236,11 +237,25 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
        return rc;
 }
 
+static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds)
+{
+       msleep(100);
+       return 0;
+}
+
 static void label_area_release(void *lsa)
 {
        vfree(lsa);
 }
 
+static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds)
+{
+       struct cxl_endpoint_dvsec_info *info;
+
+       info = &cxlds->info;
+       info->mem_enabled = true;
+}
+
 static int cxl_mock_mem_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -261,7 +276,9 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
        if (IS_ERR(cxlds))
                return PTR_ERR(cxlds);
 
+       cxlds->serial = pdev->id;
        cxlds->mbox_send = cxl_mock_mbox_send;
+       cxlds->wait_media_ready = cxl_mock_wait_media_ready;
        cxlds->payload_size = SZ_4K;
 
        rc = cxl_enumerate_cmds(cxlds);
@@ -276,6 +293,8 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
        if (rc)
                return rc;
 
+       mock_validate_dvsec_ranges(cxlds);
+
        cxlmd = devm_cxl_add_memdev(cxlds);
        if (IS_ERR(cxlmd))
                return PTR_ERR(cxlmd);
index 17408f8..6e8c9d6 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/export.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <cxlmem.h>
+#include <cxlpci.h>
 #include "mock.h"
 
 static LIST_HEAD(mock);
@@ -114,32 +116,6 @@ struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
 }
 EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root);
 
-void __wrap_pci_walk_bus(struct pci_bus *bus,
-                        int (*cb)(struct pci_dev *, void *), void *userdata)
-{
-       int index;
-       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-       if (ops && ops->is_mock_bus(bus)) {
-               int rc, i;
-
-               /*
-                * Simulate 2 root ports per host-bridge and no
-                * depth recursion.
-                */
-               for (i = 0; i < 2; i++) {
-                       rc = cb((struct pci_dev *) ops->mock_port(bus, i),
-                               userdata);
-                       if (rc)
-                               break;
-               }
-       } else
-               pci_walk_bus(bus, cb, userdata);
-
-       put_cxl_mock_ops(index);
-}
-EXPORT_SYMBOL_GPL(__wrap_pci_walk_bus);
-
 struct nvdimm_bus *
 __wrap_nvdimm_bus_register(struct device *dev,
                           struct nvdimm_bus_descriptor *nd_desc)
@@ -155,5 +131,68 @@ __wrap_nvdimm_bus_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
 
+struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port)
+{
+       int index;
+       struct cxl_hdm *cxlhdm;
+       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+       if (ops && ops->is_mock_port(port->uport))
+               cxlhdm = ops->devm_cxl_setup_hdm(port);
+       else
+               cxlhdm = devm_cxl_setup_hdm(port);
+       put_cxl_mock_ops(index);
+
+       return cxlhdm;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL);
+
+int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+{
+       int rc, index;
+       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+       if (ops && ops->is_mock_port(port->uport))
+               rc = ops->devm_cxl_add_passthrough_decoder(port);
+       else
+               rc = devm_cxl_add_passthrough_decoder(port);
+       put_cxl_mock_ops(index);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL);
+
+int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
+{
+       int rc, index;
+       struct cxl_port *port = cxlhdm->port;
+       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+       if (ops && ops->is_mock_port(port->uport))
+               rc = ops->devm_cxl_enumerate_decoders(cxlhdm);
+       else
+               rc = devm_cxl_enumerate_decoders(cxlhdm);
+       put_cxl_mock_ops(index);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL);
+
+int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+       int rc, index;
+       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+       if (ops && ops->is_mock_port(port->uport))
+               rc = ops->devm_cxl_port_enumerate_dports(port);
+       else
+               rc = devm_cxl_port_enumerate_dports(port);
+       put_cxl_mock_ops(index);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL);
+
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
+MODULE_IMPORT_NS(CXL);
index 15ed0fd..738f24e 100644 (file)
@@ -2,6 +2,7 @@
 
 #include <linux/list.h>
 #include <linux/acpi.h>
+#include <cxl.h>
 
 struct cxl_mock_ops {
        struct list_head list;
@@ -15,10 +16,13 @@ struct cxl_mock_ops {
                                             struct acpi_object_list *arguments,
                                             unsigned long long *data);
        struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle);
-       struct platform_device *(*mock_port)(struct pci_bus *bus, int index);
        bool (*is_mock_bus)(struct pci_bus *bus);
-       bool (*is_mock_port)(struct platform_device *pdev);
+       bool (*is_mock_port)(struct device *dev);
        bool (*is_mock_dev)(struct device *dev);
+       int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
+       struct cxl_hdm *(*devm_cxl_setup_hdm)(struct cxl_port *port);
+       int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
+       int (*devm_cxl_enumerate_decoders)(struct cxl_hdm *hdm);
 };
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops);