Merge tag 'cxl-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 30 Apr 2023 18:51:51 +0000 (11:51 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 30 Apr 2023 18:51:51 +0000 (11:51 -0700)
Pull compute express link updates from Dan Williams:
 "DOE support is promoted from drivers/cxl/ to drivers/pci/ with Bjorn's
  blessing, and the CXL core continues to mature its media management
  capabilities with support for listing and injecting media errors. Some
  late fixes that missed v6.3-final are also included:

   - Refactor the DOE infrastructure (Data Object Exchange
     PCI-config-cycle mailbox) to be a facility of the PCI core rather
     than the CXL core.

     This is foundational for upcoming support for PCI
     device-attestation and PCIe / CXL link encryption.

   - Add support for retrieving and injecting poison for CXL memory
     expanders.

     This enabling uses trace-events to convey CXL media error records
     to user tooling. It includes translation of device-local addresses
     (DPA) to system physical addresses (SPA) and their corresponding
     CXL region.

   - Fixes for decoder enumeration that missed v6.3-final

   - Miscellaneous fixups"

* tag 'cxl-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (38 commits)
  cxl/test: Add mock test for set_timestamp
  cxl/mbox: Update CMD_RC_TABLE
  tools/testing/cxl: Require CONFIG_DEBUG_FS
  tools/testing/cxl: Add a sysfs attr to test poison inject limits
  tools/testing/cxl: Use injected poison for get poison list
  tools/testing/cxl: Mock the Clear Poison mailbox command
  tools/testing/cxl: Mock the Inject Poison mailbox command
  cxl/mem: Add debugfs attributes for poison inject and clear
  cxl/memdev: Trace inject and clear poison as cxl_poison events
  cxl/memdev: Warn of poison inject or clear to a mapped region
  cxl/memdev: Add support for the Clear Poison mailbox command
  cxl/memdev: Add support for the Inject Poison mailbox command
  tools/testing/cxl: Mock support for Get Poison List
  cxl/trace: Add an HPA to cxl_poison trace events
  cxl/region: Provide region info to the cxl_poison trace event
  cxl/memdev: Add trigger_poison_list sysfs attribute
  cxl/trace: Add TRACE support for CXL media-error records
  cxl/mbox: Add GET_POISON_LIST mailbox command
  cxl/mbox: Initialize the poison state
  cxl/mbox: Restrict poison cmds to debugfs cxl_raw_allow_all
  ...

25 files changed:
.clang-format
Documentation/ABI/testing/debugfs-cxl [new file with mode: 0644]
Documentation/ABI/testing/sysfs-bus-cxl
drivers/cxl/core/core.h
drivers/cxl/core/hdm.c
drivers/cxl/core/mbox.c
drivers/cxl/core/memdev.c
drivers/cxl/core/pci.c
drivers/cxl/core/port.c
drivers/cxl/core/region.c
drivers/cxl/core/trace.c
drivers/cxl/core/trace.h
drivers/cxl/cxlmem.h
drivers/cxl/mem.c
drivers/cxl/pci.c
drivers/cxl/port.c
drivers/pci/doe.c
drivers/pci/pci.h
drivers/pci/probe.c
drivers/pci/remove.c
include/linux/pci-doe.h
include/linux/pci.h
include/uapi/linux/cxl_mem.h
tools/testing/cxl/config_check.c
tools/testing/cxl/test/mem.c

index 2048b02..0d1ed87 100644 (file)
@@ -521,7 +521,6 @@ ForEachMacros:
   - 'of_property_for_each_u32'
   - 'pci_bus_for_each_resource'
   - 'pci_dev_for_each_resource'
-  - 'pci_doe_for_each_off'
   - 'pcl_for_each_chunk'
   - 'pcl_for_each_segment'
   - 'pcm_for_each_format'
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
new file mode 100644 (file)
index 0000000..fe61d37
--- /dev/null
@@ -0,0 +1,35 @@
+What:          /sys/kernel/debug/cxl/memX/inject_poison
+Date:          April, 2023
+KernelVersion: v6.4
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (WO) When a Device Physical Address (DPA) is written to this
+               attribute, the memdev driver sends an inject poison command to
+               the device for the specified address. The DPA must be 64-byte
+               aligned and the length of the injected poison is 64-bytes. If
+               successful, the device returns poison when the address is
+               accessed through the CXL.mem bus. Injecting poison adds the
+               address to the device's Poison List and the error source is set
+               to Injected. In addition, the device adds a poison creation
+               event to its internal Informational Event log, updates the
+               Event Status register, and if configured, interrupts the host.
+               It is not an error to inject poison into an address that
+               already has poison present and no error is returned. The
+               inject_poison attribute is only visible for devices supporting
+               the capability.
+
+
+What:          /sys/kernel/debug/memX/clear_poison
+Date:          April, 2023
+KernelVersion: v6.4
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (WO) When a Device Physical Address (DPA) is written to this
+               attribute, the memdev driver sends a clear poison command to
+               the device for the specified address. Clearing poison removes
+               the address from the device's Poison List and writes 0 (zero)
+               for 64 bytes starting at address. It is not an error to clear
+               poison from an address that does not have poison set. If the
+               device cannot clear poison from the address, -ENXIO is returned.
+               The clear_poison attribute is only visible for devices
+               supporting the capability.
index 3acf2f1..48ac0d9 100644 (file)
@@ -415,3 +415,17 @@ Description:
                1), and checks that the hardware accepts the commit request.
                Reading this value indicates whether the region is committed or
                not.
+
+
+What:          /sys/bus/cxl/devices/memX/trigger_poison_list
+Date:          April, 2023
+KernelVersion: v6.4
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (WO) When a boolean 'true' is written to this attribute the
+               memdev driver retrieves the poison list from the device. The
+               list consists of addresses that are poisoned, or would result
+               in poison if accessed, and the source of the poison. This
+               attribute is only visible for devices supporting the
+               capability. The retrieved errors are logged as kernel
+               events when cxl_poison event tracing is enabled.
index cde475e..27f0968 100644 (file)
@@ -25,7 +25,12 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
 #define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
 int cxl_region_init(void);
 void cxl_region_exit(void);
+int cxl_get_poison_by_endpoint(struct cxl_port *port);
 #else
+static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
+{
+       return 0;
+}
 static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
 {
 }
@@ -64,4 +69,10 @@ int cxl_memdev_init(void);
 void cxl_memdev_exit(void);
 void cxl_mbox_init(void);
 
+enum cxl_poison_trace_type {
+       CXL_POISON_TRACE_LIST,
+       CXL_POISON_TRACE_INJECT,
+       CXL_POISON_TRACE_CLEAR,
+};
+
 #endif /* __CXL_CORE_H__ */
index 02cc2c3..7889ff2 100644 (file)
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/seq_file.h>
 #include <linux/device.h>
 #include <linux/delay.h>
@@ -93,8 +92,9 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
 
        cxl_probe_component_regs(&port->dev, crb, &map.component_map);
        if (!map.component_map.hdm_decoder.valid) {
-               dev_err(&port->dev, "HDM decoder registers invalid\n");
-               return -ENXIO;
+               dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
+               /* unique error code to indicate no HDM decoder capability */
+               return -ENODEV;
        }
 
        return cxl_map_component_regs(&port->dev, regs, &map,
@@ -130,6 +130,14 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
         */
        for (i = 0; i < cxlhdm->decoder_count; i++) {
                ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+               dev_dbg(&info->port->dev,
+                       "decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n",
+                       info->port->id, i,
+                       FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl),
+                       readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)),
+                       readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)),
+                       readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)),
+                       readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i)));
                if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
                        return false;
        }
@@ -269,8 +277,11 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
 
        lockdep_assert_held_write(&cxl_dpa_rwsem);
 
-       if (!len)
-               goto success;
+       if (!len) {
+               dev_warn(dev, "decoder%d.%d: empty reservation attempted\n",
+                        port->id, cxled->cxld.id);
+               return -EINVAL;
+       }
 
        if (cxled->dpa_res) {
                dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n",
@@ -323,7 +334,6 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
                cxled->mode = CXL_DECODER_MIXED;
        }
 
-success:
        port->hdm_end++;
        get_device(&cxled->cxld.dev);
        return 0;
@@ -783,8 +793,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
                            int *target_map, void __iomem *hdm, int which,
                            u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
 {
+       u64 size, base, skip, dpa_size, lo, hi;
        struct cxl_endpoint_decoder *cxled;
-       u64 size, base, skip, dpa_size;
        bool committed;
        u32 remainder;
        int i, rc;
@@ -799,8 +809,12 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
                                                        which, info);
 
        ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
-       base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
-       size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+       lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
+       hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
+       base = (hi << 32) + lo;
+       lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+       hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
+       size = (hi << 32) + lo;
        committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
        cxld->commit = cxl_decoder_commit;
        cxld->reset = cxl_decoder_reset;
@@ -833,6 +847,13 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
                                 port->id, cxld->id);
                        return -ENXIO;
                }
+
+               if (size == 0) {
+                       dev_warn(&port->dev,
+                                "decoder%d.%d: Committed with zero size\n",
+                                port->id, cxld->id);
+                       return -ENXIO;
+               }
                port->commit_end = cxld->id;
        } else {
                /* unless / until type-2 drivers arrive, assume type-3 */
@@ -855,9 +876,14 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
        if (rc)
                return rc;
 
+       dev_dbg(&port->dev, "decoder%d.%d: range: %#llx-%#llx iw: %d ig: %d\n",
+               port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end,
+               cxld->interleave_ways, cxld->interleave_granularity);
+
        if (!info) {
-               target_list.value =
-                       ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+               lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+               hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
+               target_list.value = (hi << 32) + lo;
                for (i = 0; i < cxld->interleave_ways; i++)
                        target_map[i] = target_list.target_id[i];
 
@@ -874,7 +900,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
                        port->id, cxld->id, size, cxld->interleave_ways);
                return -ENXIO;
        }
-       skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+       lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+       hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
+       skip = (hi << 32) + lo;
        cxled = to_cxl_endpoint_decoder(&cxld->dev);
        rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
        if (rc) {
index f2addb4..23b9ff9 100644 (file)
@@ -1,10 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/security.h>
 #include <linux/debugfs.h>
 #include <linux/ktime.h>
 #include <linux/mutex.h>
+#include <asm/unaligned.h>
+#include <cxlpci.h>
 #include <cxlmem.h>
 #include <cxl.h>
 
@@ -61,12 +62,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
        CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
        CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
        CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
-       CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0),
-       CXL_CMD(INJECT_POISON, 0x8, 0, 0),
-       CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
        CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
-       CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
-       CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0),
 };
 
 /*
@@ -87,6 +83,9 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
  *
  * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
  * is kept up to date with patrol notifications and error management.
+ *
+ * CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel
+ * driver orchestration for safety.
  */
 static u16 cxl_disabled_raw_commands[] = {
        CXL_MBOX_OP_ACTIVATE_FW,
@@ -95,6 +94,9 @@ static u16 cxl_disabled_raw_commands[] = {
        CXL_MBOX_OP_SET_SHUTDOWN_STATE,
        CXL_MBOX_OP_SCAN_MEDIA,
        CXL_MBOX_OP_GET_SCAN_MEDIA,
+       CXL_MBOX_OP_GET_POISON,
+       CXL_MBOX_OP_INJECT_POISON,
+       CXL_MBOX_OP_CLEAR_POISON,
 };
 
 /*
@@ -119,6 +121,43 @@ static bool cxl_is_security_command(u16 opcode)
        return false;
 }
 
+static bool cxl_is_poison_command(u16 opcode)
+{
+#define CXL_MBOX_OP_POISON_CMDS 0x43
+
+       if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
+               return true;
+
+       return false;
+}
+
+static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
+                                      u16 opcode)
+{
+       switch (opcode) {
+       case CXL_MBOX_OP_GET_POISON:
+               set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_INJECT_POISON:
+               set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_CLEAR_POISON:
+               set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
+               set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_SCAN_MEDIA:
+               set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_GET_SCAN_MEDIA:
+               set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
+               break;
+       default:
+               break;
+       }
+}
+
 static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
 {
        struct cxl_mem_command *c;
@@ -634,13 +673,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
                u16 opcode = le16_to_cpu(cel_entry[i].opcode);
                struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 
-               if (!cmd) {
+               if (!cmd && !cxl_is_poison_command(opcode)) {
                        dev_dbg(cxlds->dev,
                                "Opcode 0x%04x unsupported by driver\n", opcode);
                        continue;
                }
 
-               set_bit(cmd->info.id, cxlds->enabled_cmds);
+               if (cmd)
+                       set_bit(cmd->info.id, cxlds->enabled_cmds);
+
+               if (cxl_is_poison_command(opcode))
+                       cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
+
                dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
        }
 }
@@ -994,6 +1038,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
        /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
        struct cxl_mbox_identify id;
        struct cxl_mbox_cmd mbox_cmd;
+       u32 val;
        int rc;
 
        mbox_cmd = (struct cxl_mbox_cmd) {
@@ -1017,6 +1062,11 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
        cxlds->lsa_size = le32_to_cpu(id.lsa_size);
        memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
 
+       if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) {
+               val = get_unaligned_le24(id.poison_list_max_mer);
+               cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
+       }
+
        return 0;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
@@ -1107,6 +1157,91 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
 
+int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
+                      struct cxl_region *cxlr)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_mbox_poison_out *po;
+       struct cxl_mbox_poison_in pi;
+       struct cxl_mbox_cmd mbox_cmd;
+       int nr_records = 0;
+       int rc;
+
+       rc = mutex_lock_interruptible(&cxlds->poison.lock);
+       if (rc)
+               return rc;
+
+       po = cxlds->poison.list_out;
+       pi.offset = cpu_to_le64(offset);
+       pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
+
+       mbox_cmd = (struct cxl_mbox_cmd) {
+               .opcode = CXL_MBOX_OP_GET_POISON,
+               .size_in = sizeof(pi),
+               .payload_in = &pi,
+               .size_out = cxlds->payload_size,
+               .payload_out = po,
+               .min_out = struct_size(po, record, 0),
+       };
+
+       do {
+               rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+               if (rc)
+                       break;
+
+               for (int i = 0; i < le16_to_cpu(po->count); i++)
+                       trace_cxl_poison(cxlmd, cxlr, &po->record[i],
+                                        po->flags, po->overflow_ts,
+                                        CXL_POISON_TRACE_LIST);
+
+               /* Protect against an uncleared _FLAG_MORE */
+               nr_records = nr_records + le16_to_cpu(po->count);
+               if (nr_records >= cxlds->poison.max_errors) {
+                       dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n",
+                               nr_records);
+                       break;
+               }
+       } while (po->flags & CXL_POISON_FLAG_MORE);
+
+       mutex_unlock(&cxlds->poison.lock);
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL);
+
+static void free_poison_buf(void *buf)
+{
+       kvfree(buf);
+}
+
+/* Get Poison List output buffer is protected by cxlds->poison.lock */
+static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
+{
+       cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+       if (!cxlds->poison.list_out)
+               return -ENOMEM;
+
+       return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
+                                       cxlds->poison.list_out);
+}
+
+int cxl_poison_state_init(struct cxl_dev_state *cxlds)
+{
+       int rc;
+
+       if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
+               return 0;
+
+       rc = cxl_poison_alloc_buf(cxlds);
+       if (rc) {
+               clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
+               return rc;
+       }
+
+       mutex_init(&cxlds->poison.lock);
+       return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
+
 struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
 {
        struct cxl_dev_state *cxlds;
index 28a05f2..057a432 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/idr.h>
 #include <linux/pci.h>
 #include <cxlmem.h>
+#include "trace.h"
 #include "core.h"
 
 static DECLARE_RWSEM(cxl_memdev_rwsem);
@@ -106,6 +107,232 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(numa_node);
 
+static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       u64 offset, length;
+       int rc = 0;
+
+       /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
+       if (resource_size(&cxlds->pmem_res)) {
+               offset = cxlds->pmem_res.start;
+               length = resource_size(&cxlds->pmem_res);
+               rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+               if (rc)
+                       return rc;
+       }
+       if (resource_size(&cxlds->ram_res)) {
+               offset = cxlds->ram_res.start;
+               length = resource_size(&cxlds->ram_res);
+               rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+               /*
+                * Invalid Physical Address is not an error for
+                * volatile addresses. Device support is optional.
+                */
+               if (rc == -EFAULT)
+                       rc = 0;
+       }
+       return rc;
+}
+
+int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
+{
+       struct cxl_port *port;
+       int rc;
+
+       port = dev_get_drvdata(&cxlmd->dev);
+       if (!port || !is_cxl_endpoint(port))
+               return -EINVAL;
+
+       rc = down_read_interruptible(&cxl_dpa_rwsem);
+       if (rc)
+               return rc;
+
+       if (port->commit_end == -1) {
+               /* No regions mapped to this memdev */
+               rc = cxl_get_poison_by_memdev(cxlmd);
+       } else {
+               /* Regions mapped, collect poison by endpoint */
+               rc =  cxl_get_poison_by_endpoint(port);
+       }
+       up_read(&cxl_dpa_rwsem);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
+
+struct cxl_dpa_to_region_context {
+       struct cxl_region *cxlr;
+       u64 dpa;
+};
+
+static int __cxl_dpa_to_region(struct device *dev, void *arg)
+{
+       struct cxl_dpa_to_region_context *ctx = arg;
+       struct cxl_endpoint_decoder *cxled;
+       u64 dpa = ctx->dpa;
+
+       if (!is_endpoint_decoder(dev))
+               return 0;
+
+       cxled = to_cxl_endpoint_decoder(dev);
+       if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+               return 0;
+
+       if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
+               return 0;
+
+       dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
+               dev_name(&cxled->cxld.region->dev));
+
+       ctx->cxlr = cxled->cxld.region;
+
+       return 1;
+}
+
+static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
+{
+       struct cxl_dpa_to_region_context ctx;
+       struct cxl_port *port;
+
+       ctx = (struct cxl_dpa_to_region_context) {
+               .dpa = dpa,
+       };
+       port = dev_get_drvdata(&cxlmd->dev);
+       if (port && is_cxl_endpoint(port) && port->commit_end != -1)
+               device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
+
+       return ctx.cxlr;
+}
+
+static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+       if (!IS_ENABLED(CONFIG_DEBUG_FS))
+               return 0;
+
+       if (!resource_size(&cxlds->dpa_res)) {
+               dev_dbg(cxlds->dev, "device has no dpa resource\n");
+               return -EINVAL;
+       }
+       if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
+               dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
+                       dpa, &cxlds->dpa_res);
+               return -EINVAL;
+       }
+       if (!IS_ALIGNED(dpa, 64)) {
+               dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_mbox_inject_poison inject;
+       struct cxl_poison_record record;
+       struct cxl_mbox_cmd mbox_cmd;
+       struct cxl_region *cxlr;
+       int rc;
+
+       if (!IS_ENABLED(CONFIG_DEBUG_FS))
+               return 0;
+
+       rc = down_read_interruptible(&cxl_dpa_rwsem);
+       if (rc)
+               return rc;
+
+       rc = cxl_validate_poison_dpa(cxlmd, dpa);
+       if (rc)
+               goto out;
+
+       inject.address = cpu_to_le64(dpa);
+       mbox_cmd = (struct cxl_mbox_cmd) {
+               .opcode = CXL_MBOX_OP_INJECT_POISON,
+               .size_in = sizeof(inject),
+               .payload_in = &inject,
+       };
+       rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+       if (rc)
+               goto out;
+
+       cxlr = cxl_dpa_to_region(cxlmd, dpa);
+       if (cxlr)
+               dev_warn_once(cxlds->dev,
+                             "poison inject dpa:%#llx region: %s\n", dpa,
+                             dev_name(&cxlr->dev));
+
+       record = (struct cxl_poison_record) {
+               .address = cpu_to_le64(dpa),
+               .length = cpu_to_le32(1),
+       };
+       trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
+out:
+       up_read(&cxl_dpa_rwsem);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
+
+int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_mbox_clear_poison clear;
+       struct cxl_poison_record record;
+       struct cxl_mbox_cmd mbox_cmd;
+       struct cxl_region *cxlr;
+       int rc;
+
+       if (!IS_ENABLED(CONFIG_DEBUG_FS))
+               return 0;
+
+       rc = down_read_interruptible(&cxl_dpa_rwsem);
+       if (rc)
+               return rc;
+
+       rc = cxl_validate_poison_dpa(cxlmd, dpa);
+       if (rc)
+               goto out;
+
+       /*
+        * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
+        * is defined to accept 64 bytes of write-data, along with the
+        * address to clear. This driver uses zeroes as write-data.
+        */
+       clear = (struct cxl_mbox_clear_poison) {
+               .address = cpu_to_le64(dpa)
+       };
+
+       mbox_cmd = (struct cxl_mbox_cmd) {
+               .opcode = CXL_MBOX_OP_CLEAR_POISON,
+               .size_in = sizeof(clear),
+               .payload_in = &clear,
+       };
+
+       rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+       if (rc)
+               goto out;
+
+       cxlr = cxl_dpa_to_region(cxlmd, dpa);
+       if (cxlr)
+               dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
+                             dpa, dev_name(&cxlr->dev));
+
+       record = (struct cxl_poison_record) {
+               .address = cpu_to_le64(dpa),
+               .length = cpu_to_le32(1),
+       };
+       trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
+out:
+       up_read(&cxl_dpa_rwsem);
+
+       return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
+
 static struct attribute *cxl_memdev_attributes[] = {
        &dev_attr_serial.attr,
        &dev_attr_firmware_version.attr,
index 523d5b9..bdbd907 100644 (file)
@@ -441,27 +441,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
 #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY                0xffff
 #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
 
-static struct pci_doe_mb *find_cdat_doe(struct device *uport)
-{
-       struct cxl_memdev *cxlmd;
-       struct cxl_dev_state *cxlds;
-       unsigned long index;
-       void *entry;
-
-       cxlmd = to_cxl_memdev(uport);
-       cxlds = cxlmd->cxlds;
-
-       xa_for_each(&cxlds->doe_mbs, index, entry) {
-               struct pci_doe_mb *cur = entry;
-
-               if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
-                                         CXL_DOE_PROTOCOL_TABLE_ACCESS))
-                       return cur;
-       }
-
-       return NULL;
-}
-
 #define CDAT_DOE_REQ(entry_handle) cpu_to_le32                         \
        (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,                      \
                    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |               \
@@ -469,51 +448,26 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport)
                    CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) |            \
         FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
 
-static void cxl_doe_task_complete(struct pci_doe_task *task)
-{
-       complete(task->private);
-}
-
-struct cdat_doe_task {
-       __le32 request_pl;
-       __le32 response_pl[32];
-       struct completion c;
-       struct pci_doe_task task;
-};
-
-#define DECLARE_CDAT_DOE_TASK(req, cdt)                       \
-struct cdat_doe_task cdt = {                                  \
-       .c = COMPLETION_INITIALIZER_ONSTACK(cdt.c),           \
-       .request_pl = req,                                    \
-       .task = {                                             \
-               .prot.vid = PCI_DVSEC_VENDOR_ID_CXL,        \
-               .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
-               .request_pl = &cdt.request_pl,                \
-               .request_pl_sz = sizeof(cdt.request_pl),      \
-               .response_pl = cdt.response_pl,               \
-               .response_pl_sz = sizeof(cdt.response_pl),    \
-               .complete = cxl_doe_task_complete,            \
-               .private = &cdt.c,                            \
-       }                                                     \
-}
-
 static int cxl_cdat_get_length(struct device *dev,
                               struct pci_doe_mb *cdat_doe,
                               size_t *length)
 {
-       DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
+       __le32 request = CDAT_DOE_REQ(0);
+       __le32 response[2];
        int rc;
 
-       rc = pci_doe_submit_task(cdat_doe, &t.task);
+       rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL,
+                    CXL_DOE_PROTOCOL_TABLE_ACCESS,
+                    &request, sizeof(request),
+                    &response, sizeof(response));
        if (rc < 0) {
-               dev_err(dev, "DOE submit failed: %d", rc);
+               dev_err(dev, "DOE failed: %d", rc);
                return rc;
        }
-       wait_for_completion(&t.c);
-       if (t.task.rv < 2 * sizeof(__le32))
+       if (rc < sizeof(response))
                return -EIO;
 
-       *length = le32_to_cpu(t.response_pl[1]);
+       *length = le32_to_cpu(response[1]);
        dev_dbg(dev, "CDAT length %zu\n", *length);
 
        return 0;
@@ -521,51 +475,55 @@ static int cxl_cdat_get_length(struct device *dev,
 
 static int cxl_cdat_read_table(struct device *dev,
                               struct pci_doe_mb *cdat_doe,
-                              struct cxl_cdat *cdat)
+                              void *cdat_table, size_t *cdat_length)
 {
-       size_t length = cdat->length;
-       __le32 *data = cdat->table;
+       size_t length = *cdat_length + sizeof(__le32);
+       __le32 *data = cdat_table;
        int entry_handle = 0;
+       __le32 saved_dw = 0;
 
        do {
-               DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
+               __le32 request = CDAT_DOE_REQ(entry_handle);
                struct cdat_entry_header *entry;
                size_t entry_dw;
                int rc;
 
-               rc = pci_doe_submit_task(cdat_doe, &t.task);
+               rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL,
+                            CXL_DOE_PROTOCOL_TABLE_ACCESS,
+                            &request, sizeof(request),
+                            data, length);
                if (rc < 0) {
-                       dev_err(dev, "DOE submit failed: %d", rc);
+                       dev_err(dev, "DOE failed: %d", rc);
                        return rc;
                }
-               wait_for_completion(&t.c);
 
                /* 1 DW Table Access Response Header + CDAT entry */
-               entry = (struct cdat_entry_header *)(t.response_pl + 1);
+               entry = (struct cdat_entry_header *)(data + 1);
                if ((entry_handle == 0 &&
-                    t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) ||
+                    rc != sizeof(__le32) + sizeof(struct cdat_header)) ||
                    (entry_handle > 0 &&
-                    (t.task.rv < sizeof(__le32) + sizeof(*entry) ||
-                     t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length))))
+                    (rc < sizeof(__le32) + sizeof(*entry) ||
+                     rc != sizeof(__le32) + le16_to_cpu(entry->length))))
                        return -EIO;
 
                /* Get the CXL table access header entry handle */
                entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-                                        le32_to_cpu(t.response_pl[0]));
-               entry_dw = t.task.rv / sizeof(__le32);
+                                        le32_to_cpu(data[0]));
+               entry_dw = rc / sizeof(__le32);
                /* Skip Header */
                entry_dw -= 1;
-               entry_dw = min(length / sizeof(__le32), entry_dw);
-               /* Prevent length < 1 DW from causing a buffer overflow */
-               if (entry_dw) {
-                       memcpy(data, entry, entry_dw * sizeof(__le32));
-                       length -= entry_dw * sizeof(__le32);
-                       data += entry_dw;
-               }
+               /*
+                * Table Access Response Header overwrote the last DW of
+                * previous entry, so restore that DW
+                */
+               *data = saved_dw;
+               length -= entry_dw * sizeof(__le32);
+               data += entry_dw;
+               saved_dw = *data;
        } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
 
        /* Length in CDAT header may exceed concatenation of CDAT entries */
-       cdat->length -= length;
+       *cdat_length -= length - sizeof(__le32);
 
        return 0;
 }
@@ -578,13 +536,19 @@ static int cxl_cdat_read_table(struct device *dev,
  */
 void read_cdat_data(struct cxl_port *port)
 {
-       struct pci_doe_mb *cdat_doe;
+       struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
+       struct device *host = cxlmd->dev.parent;
        struct device *dev = &port->dev;
-       struct device *uport = port->uport;
+       struct pci_doe_mb *cdat_doe;
        size_t cdat_length;
+       void *cdat_table;
        int rc;
 
-       cdat_doe = find_cdat_doe(uport);
+       if (!dev_is_pci(host))
+               return;
+       cdat_doe = pci_find_doe_mailbox(to_pci_dev(host),
+                                       PCI_DVSEC_VENDOR_ID_CXL,
+                                       CXL_DOE_PROTOCOL_TABLE_ACCESS);
        if (!cdat_doe) {
                dev_dbg(dev, "No CDAT mailbox\n");
                return;
@@ -597,19 +561,20 @@ void read_cdat_data(struct cxl_port *port)
                return;
        }
 
-       port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
-       if (!port->cdat.table)
+       cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32),
+                                 GFP_KERNEL);
+       if (!cdat_table)
                return;
 
-       port->cdat.length = cdat_length;
-       rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
+       rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);
        if (rc) {
                /* Don't leave table data allocated on error */
-               devm_kfree(dev, port->cdat.table);
-               port->cdat.table = NULL;
-               port->cdat.length = 0;
+               devm_kfree(dev, cdat_table);
                dev_err(dev, "CDAT data read error\n");
        }
+
+       port->cdat.table = cdat_table + sizeof(__le32);
+       port->cdat.length = cdat_length;
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
 
index 72b889a..da20684 100644 (file)
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/memregion.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
index b2fd67f..f822de4 100644 (file)
@@ -2238,6 +2238,130 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
 
+struct cxl_poison_context {
+       struct cxl_port *port;
+       enum cxl_decoder_mode mode;
+       u64 offset;
+};
+
+static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
+                                  struct cxl_poison_context *ctx)
+{
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       u64 offset, length;
+       int rc = 0;
+
+       /*
+        * Collect poison for the remaining unmapped resources
+        * after poison is collected by committed endpoints.
+        *
+        * Knowing that PMEM must always follow RAM, get poison
+        * for unmapped resources based on the last decoder's mode:
+        *      ram: scan remains of ram range, then any pmem range
+        *      pmem: scan remains of pmem range
+        */
+
+       if (ctx->mode == CXL_DECODER_RAM) {
+               offset = ctx->offset;
+               length = resource_size(&cxlds->ram_res) - offset;
+               rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+               if (rc == -EFAULT)
+                       rc = 0;
+               if (rc)
+                       return rc;
+       }
+       if (ctx->mode == CXL_DECODER_PMEM) {
+               offset = ctx->offset;
+               length = resource_size(&cxlds->dpa_res) - offset;
+               if (!length)
+                       return 0;
+       } else if (resource_size(&cxlds->pmem_res)) {
+               offset = cxlds->pmem_res.start;
+               length = resource_size(&cxlds->pmem_res);
+       } else {
+               return 0;
+       }
+
+       return cxl_mem_get_poison(cxlmd, offset, length, NULL);
+}
+
+static int poison_by_decoder(struct device *dev, void *arg)
+{
+       struct cxl_poison_context *ctx = arg;
+       struct cxl_endpoint_decoder *cxled;
+       struct cxl_memdev *cxlmd;
+       u64 offset, length;
+       int rc = 0;
+
+       if (!is_endpoint_decoder(dev))
+               return rc;
+
+       cxled = to_cxl_endpoint_decoder(dev);
+       if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+               return rc;
+
+       /*
+        * Regions are only created with single mode decoders: pmem or ram.
+        * Linux does not support mixed mode decoders. This means that
+        * reading poison per endpoint decoder adheres to the requirement
+        * that poison reads of pmem and ram must be separated.
+        * CXL 3.0 Spec 8.2.9.8.4.1
+        */
+       if (cxled->mode == CXL_DECODER_MIXED) {
+               dev_dbg(dev, "poison list read unsupported in mixed mode\n");
+               return rc;
+       }
+
+       cxlmd = cxled_to_memdev(cxled);
+       if (cxled->skip) {
+               offset = cxled->dpa_res->start - cxled->skip;
+               length = cxled->skip;
+               rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
+               if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+                       rc = 0;
+               if (rc)
+                       return rc;
+       }
+
+       offset = cxled->dpa_res->start;
+       length = cxled->dpa_res->end - offset + 1;
+       rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
+       if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+               rc = 0;
+       if (rc)
+               return rc;
+
+       /* Iterate until commit_end is reached */
+       if (cxled->cxld.id == ctx->port->commit_end) {
+               ctx->offset = cxled->dpa_res->end + 1;
+               ctx->mode = cxled->mode;
+               return 1;
+       }
+
+       return 0;
+}
+
+int cxl_get_poison_by_endpoint(struct cxl_port *port)
+{
+       struct cxl_poison_context ctx;
+       int rc = 0;
+
+       rc = down_read_interruptible(&cxl_region_rwsem);
+       if (rc)
+               return rc;
+
+       ctx = (struct cxl_poison_context) {
+               .port = port
+       };
+
+       rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
+       if (rc == 1)
+               rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport), &ctx);
+
+       up_read(&cxl_region_rwsem);
+       return rc;
+}
+
 static struct lock_class_key cxl_pmem_region_key;
 
 static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
index 29ae7ce..d0403dc 100644 (file)
@@ -1,5 +1,99 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
 
+#include <cxl.h>
+#include "core.h"
+
 #define CREATE_TRACE_POINTS
 #include "trace.h"
+
+static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
+{
+       struct cxl_region_params *p = &cxlr->params;
+       int gran = p->interleave_granularity;
+       int ways = p->interleave_ways;
+       u64 offset;
+
+       /* Is the hpa within this region at all */
+       if (hpa < p->res->start || hpa > p->res->end) {
+               dev_dbg(&cxlr->dev,
+                       "Addr trans fail: hpa 0x%llx not in region\n", hpa);
+               return false;
+       }
+
+       /* Is the hpa in an expected chunk for its pos(-ition) */
+       offset = hpa - p->res->start;
+       offset = do_div(offset, gran * ways);
+       if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
+               return true;
+
+       dev_dbg(&cxlr->dev,
+               "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
+
+       return false;
+}
+
+static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
+                         struct cxl_endpoint_decoder *cxled)
+{
+       u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
+       struct cxl_region_params *p = &cxlr->params;
+       int pos = cxled->pos;
+       u16 eig = 0;
+       u8 eiw = 0;
+
+       ways_to_eiw(p->interleave_ways, &eiw);
+       granularity_to_eig(p->interleave_granularity, &eig);
+
+       /*
+        * The device position in the region interleave set was removed
+        * from the offset at HPA->DPA translation. To reconstruct the
+        * HPA, place the 'pos' in the offset.
+        *
+        * The placement of 'pos' in the HPA is determined by interleave
+        * ways and granularity and is defined in the CXL Spec 3.0 Section
+        * 8.2.4.19.13 Implementation Note: Device Decode Logic
+        */
+
+       /* Remove the dpa base */
+       dpa_offset = dpa - cxl_dpa_resource_start(cxled);
+
+       mask_upper = GENMASK_ULL(51, eig + 8);
+
+       if (eiw < 8) {
+               hpa_offset = (dpa_offset & mask_upper) << eiw;
+               hpa_offset |= pos << (eig + 8);
+       } else {
+               bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
+               bits_upper = bits_upper * 3;
+               hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
+       }
+
+       /* The lower bits remain unchanged */
+       hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
+
+       /* Apply the hpa_offset to the region base address */
+       hpa = hpa_offset + p->res->start;
+
+       if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos))
+               return ULLONG_MAX;
+
+       return hpa;
+}
+
+u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *cxlmd,
+                 u64 dpa)
+{
+       struct cxl_region_params *p = &cxlr->params;
+       struct cxl_endpoint_decoder *cxled = NULL;
+
+       for (int i = 0; i <  p->nr_targets; i++) {
+               cxled = p->targets[i];
+               if (cxlmd == cxled_to_memdev(cxled))
+                       break;
+       }
+       if (!cxled || cxlmd != cxled_to_memdev(cxled))
+               return ULLONG_MAX;
+
+       return cxl_dpa_to_hpa(dpa, cxlr, cxled);
+}
index 9b8d3d9..a0b5819 100644 (file)
@@ -7,10 +7,12 @@
 #define _CXL_EVENTS_H
 
 #include <linux/tracepoint.h>
+#include <linux/pci.h>
 #include <asm-generic/unaligned.h>
 
 #include <cxl.h>
 #include <cxlmem.h>
+#include "core.h"
 
 #define CXL_RAS_UC_CACHE_DATA_PARITY   BIT(0)
 #define CXL_RAS_UC_CACHE_ADDR_PARITY   BIT(1)
@@ -600,6 +602,107 @@ TRACE_EVENT(cxl_memory_module,
        )
 );
 
+#define show_poison_trace_type(type)                   \
+       __print_symbolic(type,                          \
+       { CXL_POISON_TRACE_LIST,        "List"   },     \
+       { CXL_POISON_TRACE_INJECT,      "Inject" },     \
+       { CXL_POISON_TRACE_CLEAR,       "Clear"  })
+
+#define __show_poison_source(source)                          \
+       __print_symbolic(source,                              \
+               { CXL_POISON_SOURCE_UNKNOWN,   "Unknown"  },  \
+               { CXL_POISON_SOURCE_EXTERNAL,  "External" },  \
+               { CXL_POISON_SOURCE_INTERNAL,  "Internal" },  \
+               { CXL_POISON_SOURCE_INJECTED,  "Injected" },  \
+               { CXL_POISON_SOURCE_VENDOR,    "Vendor"   })
+
+#define show_poison_source(source)                          \
+       (((source > CXL_POISON_SOURCE_INJECTED) &&           \
+        (source != CXL_POISON_SOURCE_VENDOR)) ? "Reserved"  \
+        : __show_poison_source(source))
+
+#define show_poison_flags(flags)                             \
+       __print_flags(flags, "|",                            \
+               { CXL_POISON_FLAG_MORE,      "More"     },   \
+               { CXL_POISON_FLAG_OVERFLOW,  "Overflow"  },  \
+               { CXL_POISON_FLAG_SCANNING,  "Scanning"  })
+
+#define __cxl_poison_addr(record)                                      \
+       (le64_to_cpu(record->address))
+#define cxl_poison_record_dpa(record)                                  \
+       (__cxl_poison_addr(record) & CXL_POISON_START_MASK)
+#define cxl_poison_record_source(record)                               \
+       (__cxl_poison_addr(record)  & CXL_POISON_SOURCE_MASK)
+#define cxl_poison_record_dpa_length(record)                           \
+       (le32_to_cpu(record->length) * CXL_POISON_LEN_MULT)
+#define cxl_poison_overflow(flags, time)                               \
+       (flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0)
+
+u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *memdev, u64 dpa);
+
+TRACE_EVENT(cxl_poison,
+
+       TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region,
+                const struct cxl_poison_record *record, u8 flags,
+                __le64 overflow_ts, enum cxl_poison_trace_type trace_type),
+
+       TP_ARGS(cxlmd, region, record, flags, overflow_ts, trace_type),
+
+       TP_STRUCT__entry(
+               __string(memdev, dev_name(&cxlmd->dev))
+               __string(host, dev_name(cxlmd->dev.parent))
+               __field(u64, serial)
+               __field(u8, trace_type)
+               __string(region, region)
+               __field(u64, overflow_ts)
+               __field(u64, hpa)
+               __field(u64, dpa)
+               __field(u32, dpa_length)
+               __array(char, uuid, 16)
+               __field(u8, source)
+               __field(u8, flags)
+           ),
+
+       TP_fast_assign(
+               __assign_str(memdev, dev_name(&cxlmd->dev));
+               __assign_str(host, dev_name(cxlmd->dev.parent));
+               __entry->serial = cxlmd->cxlds->serial;
+               __entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts);
+               __entry->dpa = cxl_poison_record_dpa(record);
+               __entry->dpa_length = cxl_poison_record_dpa_length(record);
+               __entry->source = cxl_poison_record_source(record);
+               __entry->trace_type = trace_type;
+               __entry->flags = flags;
+               if (region) {
+                       __assign_str(region, dev_name(&region->dev));
+                       memcpy(__entry->uuid, &region->params.uuid, 16);
+                       __entry->hpa = cxl_trace_hpa(region, cxlmd,
+                                                    __entry->dpa);
+               } else {
+                       __assign_str(region, "");
+                       memset(__entry->uuid, 0, 16);
+                       __entry->hpa = ULLONG_MAX;
+               }
+           ),
+
+       TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s "  \
+               "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x "    \
+               "source=%s flags=%s overflow_time=%llu",
+               __get_str(memdev),
+               __get_str(host),
+               __entry->serial,
+               show_poison_trace_type(__entry->trace_type),
+               __get_str(region),
+               __entry->uuid,
+               __entry->hpa,
+               __entry->dpa,
+               __entry->dpa_length,
+               show_poison_source(__entry->source),
+               show_poison_flags(__entry->flags),
+               __entry->overflow_ts
+       )
+);
+
 #endif /* _CXL_EVENTS_H */
 
 #define TRACE_INCLUDE_FILE trace
index 090aceb..db12b63 100644 (file)
@@ -127,7 +127,7 @@ struct cxl_mbox_cmd {
 };
 
 /*
- * Per CXL 2.0 Section 8.2.8.4.5.1
+ * Per CXL 3.0 Section 8.2.8.4.5.1
  */
 #define CMD_CMD_RC_TABLE                                                       \
        C(SUCCESS, 0, NULL),                                                    \
@@ -145,14 +145,22 @@ struct cxl_mbox_cmd {
        C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"),         \
        C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"),          \
        C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"),     \
-       C(PADDR, -ENXIO, "physical address specified is invalid"),              \
+       C(PADDR, -EFAULT, "physical address specified is invalid"),             \
        C(POISONLMT, -ENXIO, "poison injection limit has been reached"),        \
        C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"),              \
        C(ABORT, -ENXIO, "background cmd was aborted by device"),               \
        C(SECURITY, -ENXIO, "not valid in the current security state"),         \
        C(PASSPHRASE, -ENXIO, "phrase doesn't match current set passphrase"),   \
        C(MBUNSUPPORTED, -ENXIO, "unsupported on the mailbox it was issued on"),\
-       C(PAYLOADLEN, -ENXIO, "invalid payload length")
+       C(PAYLOADLEN, -ENXIO, "invalid payload length"),                        \
+       C(LOG, -ENXIO, "invalid or unsupported log page"),                      \
+       C(INTERRUPTED, -ENXIO, "asynchronous event occured"),                   \
+       C(FEATUREVERSION, -ENXIO, "unsupported feature version"),               \
+       C(FEATURESELVALUE, -ENXIO, "unsupported feature selection value"),      \
+       C(FEATURETRANSFERIP, -ENXIO, "feature transfer in progress"),           \
+       C(FEATURETRANSFEROOO, -ENXIO, "feature transfer out of order"),         \
+       C(RESOURCEEXHAUSTED, -ENXIO, "resources are exhausted"),                \
+       C(EXTLIST, -ENXIO, "invalid Extent List"),                              \
 
 #undef C
 #define C(a, b, c) CXL_MBOX_CMD_RC_##a
@@ -215,6 +223,37 @@ struct cxl_event_state {
        struct mutex log_lock;
 };
 
+/* Device enabled poison commands */
+enum poison_cmd_enabled_bits {
+       CXL_POISON_ENABLED_LIST,
+       CXL_POISON_ENABLED_INJECT,
+       CXL_POISON_ENABLED_CLEAR,
+       CXL_POISON_ENABLED_SCAN_CAPS,
+       CXL_POISON_ENABLED_SCAN_MEDIA,
+       CXL_POISON_ENABLED_SCAN_RESULTS,
+       CXL_POISON_ENABLED_MAX
+};
+
+/**
+ * struct cxl_poison_state - Driver poison state info
+ *
+ * @max_errors: Maximum media error records held in device cache
+ * @enabled_cmds: All poison commands enabled in the CEL
+ * @list_out: The poison list payload returned by device
+ * @lock: Protect reads of the poison list
+ *
+ * Reads of the poison list are synchronized to ensure that a reader
+ * does not get an incomplete list because their request overlapped
+ * (was interrupted or preceded by) another read request of the same
+ * DPA range. CXL Spec 3.0 Section 8.2.9.8.4.1
+ */
+struct cxl_poison_state {
+       u32 max_errors;
+       DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX);
+       struct cxl_mbox_poison_out *list_out;
+       struct mutex lock;  /* Protect reads of poison list */
+};
+
 /**
  * struct cxl_dev_state - The driver device state
  *
@@ -249,8 +288,8 @@ struct cxl_event_state {
  * @component_reg_phys: register base of component registers
  * @info: Cached DVSEC information about the device.
  * @serial: PCIe Device Serial Number
- * @doe_mbs: PCI DOE mailbox array
  * @event: event log driver state
+ * @poison: poison driver state info
  * @mbox_send: @dev specific transport for transmitting mailbox commands
  *
  * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
@@ -287,9 +326,8 @@ struct cxl_dev_state {
        resource_size_t component_reg_phys;
        u64 serial;
 
-       struct xarray doe_mbs;
-
        struct cxl_event_state event;
+       struct cxl_poison_state poison;
 
        int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
 };
@@ -538,6 +576,61 @@ struct cxl_mbox_set_timestamp_in {
 
 } __packed;
 
+/* Get Poison List  CXL 3.0 Spec 8.2.9.8.4.1 */
+struct cxl_mbox_poison_in {
+       __le64 offset;
+       __le64 length;
+} __packed;
+
+struct cxl_mbox_poison_out {
+       u8 flags;
+       u8 rsvd1;
+       __le64 overflow_ts;
+       __le16 count;
+       u8 rsvd2[20];
+       struct cxl_poison_record {
+               __le64 address;
+               __le32 length;
+               __le32 rsvd;
+       } __packed record[];
+} __packed;
+
+/*
+ * Get Poison List address field encodes the starting
+ * address of poison, and the source of the poison.
+ */
+#define CXL_POISON_START_MASK          GENMASK_ULL(63, 6)
+#define CXL_POISON_SOURCE_MASK         GENMASK(2, 0)
+
+/* Get Poison List record length is in units of 64 bytes */
+#define CXL_POISON_LEN_MULT    64
+
+/* Kernel defined maximum for a list of poison errors */
+#define CXL_POISON_LIST_MAX    1024
+
+/* Get Poison List: Payload out flags */
+#define CXL_POISON_FLAG_MORE            BIT(0)
+#define CXL_POISON_FLAG_OVERFLOW        BIT(1)
+#define CXL_POISON_FLAG_SCANNING        BIT(2)
+
+/* Get Poison List: Poison Source */
+#define CXL_POISON_SOURCE_UNKNOWN      0
+#define CXL_POISON_SOURCE_EXTERNAL     1
+#define CXL_POISON_SOURCE_INTERNAL     2
+#define CXL_POISON_SOURCE_INJECTED     3
+#define CXL_POISON_SOURCE_VENDOR       7
+
+/* Inject & Clear Poison  CXL 3.0 Spec 8.2.9.8.4.2/3 */
+struct cxl_mbox_inject_poison {
+       __le64 address;
+};
+
+/* Clear Poison  CXL 3.0 Spec 8.2.9.8.4.3 */
+struct cxl_mbox_clear_poison {
+       __le64 address;
+       u8 write_data[CXL_POISON_LEN_MULT];
+} __packed;
+
 /**
  * struct cxl_mem_command - Driver representation of a memory device command
  * @info: Command information as it exists for the UAPI
@@ -608,6 +701,12 @@ void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds
 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
 void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
 int cxl_set_timestamp(struct cxl_dev_state *cxlds);
+int cxl_poison_state_init(struct cxl_dev_state *cxlds);
+int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
+                      struct cxl_region *cxlr);
+int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
+int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
+int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
 
 #ifdef CONFIG_CXL_SUSPEND
 void cxl_mem_active_inc(void);
index 39c4b54..10caf18 100644 (file)
@@ -94,6 +94,26 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
        return 0;
 }
 
+static int cxl_debugfs_poison_inject(void *data, u64 dpa)
+{
+       struct cxl_memdev *cxlmd = data;
+
+       return cxl_inject_poison(cxlmd, dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
+                        cxl_debugfs_poison_inject, "%llx\n");
+
+static int cxl_debugfs_poison_clear(void *data, u64 dpa)
+{
+       struct cxl_memdev *cxlmd = data;
+
+       return cxl_clear_poison(cxlmd, dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
+                        cxl_debugfs_poison_clear, "%llx\n");
+
 static int cxl_mem_probe(struct device *dev)
 {
        struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
@@ -117,6 +137,14 @@ static int cxl_mem_probe(struct device *dev)
 
        dentry = cxl_debugfs_create_dir(dev_name(dev));
        debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
+
+       if (test_bit(CXL_POISON_ENABLED_INJECT, cxlds->poison.enabled_cmds))
+               debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
+                                   &cxl_poison_inject_fops);
+       if (test_bit(CXL_POISON_ENABLED_CLEAR, cxlds->poison.enabled_cmds))
+               debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
+                                   &cxl_poison_clear_fops);
+
        rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
        if (rc)
                return rc;
@@ -176,10 +204,53 @@ unlock:
        return devm_add_action_or_reset(dev, enable_suspend, NULL);
 }
 
+static ssize_t trigger_poison_list_store(struct device *dev,
+                                        struct device_attribute *attr,
+                                        const char *buf, size_t len)
+{
+       bool trigger;
+       int rc;
+
+       if (kstrtobool(buf, &trigger) || !trigger)
+               return -EINVAL;
+
+       rc = cxl_trigger_poison_list(to_cxl_memdev(dev));
+
+       return rc ? rc : len;
+}
+static DEVICE_ATTR_WO(trigger_poison_list);
+
+static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       if (a == &dev_attr_trigger_poison_list.attr) {
+               struct device *dev = kobj_to_dev(kobj);
+
+               if (!test_bit(CXL_POISON_ENABLED_LIST,
+                             to_cxl_memdev(dev)->cxlds->poison.enabled_cmds))
+                       return 0;
+       }
+       return a->mode;
+}
+
+static struct attribute *cxl_mem_attrs[] = {
+       &dev_attr_trigger_poison_list.attr,
+       NULL
+};
+
+static struct attribute_group cxl_mem_group = {
+       .attrs = cxl_mem_attrs,
+       .is_visible = cxl_mem_visible,
+};
+
+__ATTRIBUTE_GROUPS(cxl_mem);
+
 static struct cxl_driver cxl_mem_driver = {
        .name = "cxl_mem",
        .probe = cxl_mem_probe,
        .id = CXL_DEVICE_MEMORY_EXPANDER,
+       .drv = {
+               .dev_groups = cxl_mem_groups,
+       },
 };
 
 module_cxl_driver(cxl_mem_driver);
index 60b2362..f7a5b8e 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/pci.h>
-#include <linux/pci-doe.h>
 #include <linux/aer.h>
 #include <linux/io.h>
 #include "cxlmem.h"
@@ -357,52 +356,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
        return rc;
 }
 
-static void cxl_pci_destroy_doe(void *mbs)
-{
-       xa_destroy(mbs);
-}
-
-static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
-{
-       struct device *dev = cxlds->dev;
-       struct pci_dev *pdev = to_pci_dev(dev);
-       u16 off = 0;
-
-       xa_init(&cxlds->doe_mbs);
-       if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
-               dev_err(dev, "Failed to create XArray for DOE's\n");
-               return;
-       }
-
-       /*
-        * Mailbox creation is best effort.  Higher layers must determine if
-        * the lack of a mailbox for their protocol is a device failure or not.
-        */
-       pci_doe_for_each_off(pdev, off) {
-               struct pci_doe_mb *doe_mb;
-
-               doe_mb = pcim_doe_create_mb(pdev, off);
-               if (IS_ERR(doe_mb)) {
-                       dev_err(dev, "Failed to create MB object for MB @ %x\n",
-                               off);
-                       continue;
-               }
-
-               if (!pci_request_config_region_exclusive(pdev, off,
-                                                        PCI_DOE_CAP_SIZEOF,
-                                                        dev_name(dev)))
-                       pci_err(pdev, "Failed to exclude DOE registers\n");
-
-               if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
-                       dev_err(dev, "xa_insert failed to insert MB @ %x\n",
-                               off);
-                       continue;
-               }
-
-               dev_dbg(dev, "Created DOE mailbox @%x\n", off);
-       }
-}
-
 /*
  * Assume that any RCIEP that emits the CXL memory expander class code
  * is an RCD
@@ -750,8 +703,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        cxlds->component_reg_phys = map.resource;
 
-       devm_cxl_pci_create_doe(cxlds);
-
        rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component,
                                    &map, BIT(CXL_CM_CAP_CAP_ID_RAS));
        if (rc)
@@ -769,6 +720,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (rc)
                return rc;
 
+       rc = cxl_poison_state_init(cxlds);
+       if (rc)
+               return rc;
+
        rc = cxl_dev_state_identify(cxlds);
        if (rc)
                return rc;
index 22a7ab2..eb57324 100644 (file)
@@ -66,14 +66,22 @@ static int cxl_switch_port_probe(struct cxl_port *port)
        if (rc < 0)
                return rc;
 
-       if (rc == 1)
-               return devm_cxl_add_passthrough_decoder(port);
-
        cxlhdm = devm_cxl_setup_hdm(port, NULL);
-       if (IS_ERR(cxlhdm))
+       if (!IS_ERR(cxlhdm))
+               return devm_cxl_enumerate_decoders(cxlhdm, NULL);
+
+       if (PTR_ERR(cxlhdm) != -ENODEV) {
+               dev_err(&port->dev, "Failed to map HDM decoder capability\n");
                return PTR_ERR(cxlhdm);
+       }
+
+       if (rc == 1) {
+               dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
+               return devm_cxl_add_passthrough_decoder(port);
+       }
 
-       return devm_cxl_enumerate_decoders(cxlhdm, NULL);
+       dev_err(&port->dev, "HDM decoder capability not found\n");
+       return -ENXIO;
 }
 
 static int cxl_endpoint_port_probe(struct cxl_port *port)
index e5e9b28..1b97a5a 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/pci-doe.h>
 #include <linux/workqueue.h>
 
+#include "pci.h"
+
 #define PCI_DOE_PROTOCOL_DISCOVERY 0
 
 /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
@@ -37,7 +39,7 @@
  *
  * This state is used to manage a single DOE mailbox capability.  All fields
  * should be considered opaque to the consumers and the structure passed into
- * the helpers below after being created by devm_pci_doe_create()
+ * the helpers below after being created by pci_doe_create_mb().
  *
  * @pdev: PCI device this mailbox belongs to
  * @cap_offset: Capability offset
@@ -56,6 +58,40 @@ struct pci_doe_mb {
        unsigned long flags;
 };
 
+struct pci_doe_protocol {
+       u16 vid;
+       u8 type;
+};
+
+/**
+ * struct pci_doe_task - represents a single query/response
+ *
+ * @prot: DOE Protocol
+ * @request_pl: The request payload
+ * @request_pl_sz: Size of the request payload (bytes)
+ * @response_pl: The response payload
+ * @response_pl_sz: Size of the response payload (bytes)
+ * @rv: Return value.  Length of received response or error (bytes)
+ * @complete: Called when task is complete
+ * @private: Private data for the consumer
+ * @work: Used internally by the mailbox
+ * @doe_mb: Used internally by the mailbox
+ */
+struct pci_doe_task {
+       struct pci_doe_protocol prot;
+       const __le32 *request_pl;
+       size_t request_pl_sz;
+       __le32 *response_pl;
+       size_t response_pl_sz;
+       int rv;
+       void (*complete)(struct pci_doe_task *task);
+       void *private;
+
+       /* initialized by pci_doe_submit_task() */
+       struct work_struct work;
+       struct pci_doe_mb *doe_mb;
+};
+
 static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
 {
        if (wait_event_timeout(doe_mb->wq,
@@ -110,7 +146,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 {
        struct pci_dev *pdev = doe_mb->pdev;
        int offset = doe_mb->cap_offset;
-       size_t length;
+       size_t length, remainder;
        u32 val;
        int i;
 
@@ -128,7 +164,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
                return -EIO;
 
        /* Length is 2 DW of header + length of payload in DW */
-       length = 2 + task->request_pl_sz / sizeof(__le32);
+       length = 2 + DIV_ROUND_UP(task->request_pl_sz, sizeof(__le32));
        if (length > PCI_DOE_MAX_LENGTH)
                return -EIO;
        if (length == PCI_DOE_MAX_LENGTH)
@@ -141,10 +177,21 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
        pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
                               FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
                                          length));
+
+       /* Write payload */
        for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
                pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
                                       le32_to_cpu(task->request_pl[i]));
 
+       /* Write last payload dword */
+       remainder = task->request_pl_sz % sizeof(__le32);
+       if (remainder) {
+               val = 0;
+               memcpy(&val, &task->request_pl[i], remainder);
+               le32_to_cpus(&val);
+               pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
+       }
+
        pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
 
        return 0;
@@ -164,11 +211,11 @@ static bool pci_doe_data_obj_ready(struct pci_doe_mb *doe_mb)
 
 static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
 {
+       size_t length, payload_length, remainder, received;
        struct pci_dev *pdev = doe_mb->pdev;
        int offset = doe_mb->cap_offset;
-       size_t length, payload_length;
+       int i = 0;
        u32 val;
-       int i;
 
        /* Read the first dword to get the protocol */
        pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
@@ -195,15 +242,38 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
 
        /* First 2 dwords have already been read */
        length -= 2;
-       payload_length = min(length, task->response_pl_sz / sizeof(__le32));
-       /* Read the rest of the response payload */
-       for (i = 0; i < payload_length; i++) {
+       received = task->response_pl_sz;
+       payload_length = DIV_ROUND_UP(task->response_pl_sz, sizeof(__le32));
+       remainder = task->response_pl_sz % sizeof(__le32);
+
+       /* remainder signifies number of data bytes in last payload dword */
+       if (!remainder)
+               remainder = sizeof(__le32);
+
+       if (length < payload_length) {
+               received = length * sizeof(__le32);
+               payload_length = length;
+               remainder = sizeof(__le32);
+       }
+
+       if (payload_length) {
+               /* Read all payload dwords except the last */
+               for (; i < payload_length - 1; i++) {
+                       pci_read_config_dword(pdev, offset + PCI_DOE_READ,
+                                             &val);
+                       task->response_pl[i] = cpu_to_le32(val);
+                       pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+               }
+
+               /* Read last payload dword */
                pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
-               task->response_pl[i] = cpu_to_le32(val);
+               cpu_to_le32s(&val);
+               memcpy(&task->response_pl[i], &val, remainder);
                /* Prior to the last ack, ensure Data Object Ready */
-               if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb))
+               if (!pci_doe_data_obj_ready(doe_mb))
                        return -EIO;
                pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+               i++;
        }
 
        /* Flush excess length */
@@ -217,7 +287,7 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
        if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
                return -EIO;
 
-       return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32);
+       return received;
 }
 
 static void signal_task_complete(struct pci_doe_task *task, int rv)
@@ -321,26 +391,15 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
        __le32 request_pl_le = cpu_to_le32(request_pl);
        __le32 response_pl_le;
        u32 response_pl;
-       DECLARE_COMPLETION_ONSTACK(c);
-       struct pci_doe_task task = {
-               .prot.vid = PCI_VENDOR_ID_PCI_SIG,
-               .prot.type = PCI_DOE_PROTOCOL_DISCOVERY,
-               .request_pl = &request_pl_le,
-               .request_pl_sz = sizeof(request_pl),
-               .response_pl = &response_pl_le,
-               .response_pl_sz = sizeof(response_pl),
-               .complete = pci_doe_task_complete,
-               .private = &c,
-       };
        int rc;
 
-       rc = pci_doe_submit_task(doe_mb, &task);
+       rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY,
+                    &request_pl_le, sizeof(request_pl_le),
+                    &response_pl_le, sizeof(response_pl_le));
        if (rc < 0)
                return rc;
 
-       wait_for_completion(&c);
-
-       if (task.rv != sizeof(response_pl))
+       if (rc != sizeof(response_pl_le))
                return -EIO;
 
        response_pl = le32_to_cpu(response_pl_le);
@@ -385,37 +444,18 @@ static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
        return 0;
 }
 
-static void pci_doe_xa_destroy(void *mb)
-{
-       struct pci_doe_mb *doe_mb = mb;
-
-       xa_destroy(&doe_mb->prots);
-}
-
-static void pci_doe_destroy_workqueue(void *mb)
+static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb)
 {
-       struct pci_doe_mb *doe_mb = mb;
-
-       destroy_workqueue(doe_mb->work_queue);
-}
-
-static void pci_doe_flush_mb(void *mb)
-{
-       struct pci_doe_mb *doe_mb = mb;
-
        /* Stop all pending work items from starting */
        set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
 
        /* Cancel an in progress work item, if necessary */
        set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
        wake_up(&doe_mb->wq);
-
-       /* Flush all work items */
-       flush_workqueue(doe_mb->work_queue);
 }
 
 /**
- * pcim_doe_create_mb() - Create a DOE mailbox object
+ * pci_doe_create_mb() - Create a DOE mailbox object
  *
  * @pdev: PCI device to create the DOE mailbox for
  * @cap_offset: Offset of the DOE mailbox
@@ -426,64 +466,77 @@ static void pci_doe_flush_mb(void *mb)
  * RETURNS: created mailbox object on success
  *         ERR_PTR(-errno) on failure
  */
-struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
+static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
+                                           u16 cap_offset)
 {
        struct pci_doe_mb *doe_mb;
-       struct device *dev = &pdev->dev;
        int rc;
 
-       doe_mb = devm_kzalloc(dev, sizeof(*doe_mb), GFP_KERNEL);
+       doe_mb = kzalloc(sizeof(*doe_mb), GFP_KERNEL);
        if (!doe_mb)
                return ERR_PTR(-ENOMEM);
 
        doe_mb->pdev = pdev;
        doe_mb->cap_offset = cap_offset;
        init_waitqueue_head(&doe_mb->wq);
-
        xa_init(&doe_mb->prots);
-       rc = devm_add_action(dev, pci_doe_xa_destroy, doe_mb);
-       if (rc)
-               return ERR_PTR(rc);
 
        doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
-                                               dev_driver_string(&pdev->dev),
+                                               dev_bus_name(&pdev->dev),
                                                pci_name(pdev),
                                                doe_mb->cap_offset);
        if (!doe_mb->work_queue) {
                pci_err(pdev, "[%x] failed to allocate work queue\n",
                        doe_mb->cap_offset);
-               return ERR_PTR(-ENOMEM);
+               rc = -ENOMEM;
+               goto err_free;
        }
-       rc = devm_add_action_or_reset(dev, pci_doe_destroy_workqueue, doe_mb);
-       if (rc)
-               return ERR_PTR(rc);
 
        /* Reset the mailbox by issuing an abort */
        rc = pci_doe_abort(doe_mb);
        if (rc) {
                pci_err(pdev, "[%x] failed to reset mailbox with abort command : %d\n",
                        doe_mb->cap_offset, rc);
-               return ERR_PTR(rc);
+               goto err_destroy_wq;
        }
 
        /*
         * The state machine and the mailbox should be in sync now;
-        * Set up mailbox flush prior to using the mailbox to query protocols.
+        * Use the mailbox to query protocols.
         */
-       rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
-       if (rc)
-               return ERR_PTR(rc);
-
        rc = pci_doe_cache_protocols(doe_mb);
        if (rc) {
                pci_err(pdev, "[%x] failed to cache protocols : %d\n",
                        doe_mb->cap_offset, rc);
-               return ERR_PTR(rc);
+               goto err_cancel;
        }
 
        return doe_mb;
+
+err_cancel:
+       pci_doe_cancel_tasks(doe_mb);
+       xa_destroy(&doe_mb->prots);
+err_destroy_wq:
+       destroy_workqueue(doe_mb->work_queue);
+err_free:
+       kfree(doe_mb);
+       return ERR_PTR(rc);
+}
+
+/**
+ * pci_doe_destroy_mb() - Destroy a DOE mailbox object
+ *
+ * @doe_mb: DOE mailbox
+ *
+ * Destroy all internal data structures created for the DOE mailbox.
+ */
+static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
+{
+       pci_doe_cancel_tasks(doe_mb);
+       xa_destroy(&doe_mb->prots);
+       destroy_workqueue(doe_mb->work_queue);
+       kfree(doe_mb);
 }
-EXPORT_SYMBOL_GPL(pcim_doe_create_mb);
 
 /**
  * pci_doe_supports_prot() - Return if the DOE instance supports the given
@@ -494,7 +547,7 @@ EXPORT_SYMBOL_GPL(pcim_doe_create_mb);
  *
  * RETURNS: True if the DOE mailbox supports the protocol specified
  */
-bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
+static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
 {
        unsigned long index;
        void *entry;
@@ -509,7 +562,6 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
 
        return false;
 }
-EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
 
 /**
  * pci_doe_submit_task() - Submit a task to be processed by the state machine
@@ -530,19 +582,12 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
  *
  * RETURNS: 0 when task has been successfully queued, -ERRNO on error
  */
-int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
+static int pci_doe_submit_task(struct pci_doe_mb *doe_mb,
+                              struct pci_doe_task *task)
 {
        if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
                return -EINVAL;
 
-       /*
-        * DOE requests must be a whole number of DW and the response needs to
-        * be big enough for at least 1 DW
-        */
-       if (task->request_pl_sz % sizeof(__le32) ||
-           task->response_pl_sz < sizeof(__le32))
-               return -EINVAL;
-
        if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
                return -EIO;
 
@@ -551,4 +596,129 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
        queue_work(doe_mb->work_queue, &task->work);
        return 0;
 }
-EXPORT_SYMBOL_GPL(pci_doe_submit_task);
+
+/**
+ * pci_doe() - Perform Data Object Exchange
+ *
+ * @doe_mb: DOE Mailbox
+ * @vendor: Vendor ID
+ * @type: Data Object Type
+ * @request: Request payload
+ * @request_sz: Size of request payload (bytes)
+ * @response: Response payload
+ * @response_sz: Size of response payload (bytes)
+ *
+ * Submit @request to @doe_mb and store the @response.
+ * The DOE exchange is performed synchronously and may therefore sleep.
+ *
+ * Payloads are treated as opaque byte streams which are transmitted verbatim,
+ * without byte-swapping.  If payloads contain little-endian register values,
+ * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
+ *
+ * For convenience, arbitrary payload sizes are allowed even though PCIe r6.0
+ * sec 6.30.1 specifies the Data Object Header 2 "Length" in dwords.  The last
+ * (partial) dword is copied with byte granularity and padded with zeroes if
+ * necessary.  Callers are thus relieved of using dword-sized bounce buffers.
+ *
+ * RETURNS: Length of received response or negative errno.
+ * Received data in excess of @response_sz is discarded.
+ * The length may be smaller than @response_sz and the caller
+ * is responsible for checking that.
+ */
+int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
+           const void *request, size_t request_sz,
+           void *response, size_t response_sz)
+{
+       DECLARE_COMPLETION_ONSTACK(c);
+       struct pci_doe_task task = {
+               .prot.vid = vendor,
+               .prot.type = type,
+               .request_pl = request,
+               .request_pl_sz = request_sz,
+               .response_pl = response,
+               .response_pl_sz = response_sz,
+               .complete = pci_doe_task_complete,
+               .private = &c,
+       };
+       int rc;
+
+       rc = pci_doe_submit_task(doe_mb, &task);
+       if (rc)
+               return rc;
+
+       wait_for_completion(&c);
+
+       return task.rv;
+}
+EXPORT_SYMBOL_GPL(pci_doe);
+
+/**
+ * pci_find_doe_mailbox() - Find Data Object Exchange mailbox
+ *
+ * @pdev: PCI device
+ * @vendor: Vendor ID
+ * @type: Data Object Type
+ *
+ * Find first DOE mailbox of a PCI device which supports the given protocol.
+ *
+ * RETURNS: Pointer to the DOE mailbox or NULL if none was found.
+ */
+struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
+                                       u8 type)
+{
+       struct pci_doe_mb *doe_mb;
+       unsigned long index;
+
+       xa_for_each(&pdev->doe_mbs, index, doe_mb)
+               if (pci_doe_supports_prot(doe_mb, vendor, type))
+                       return doe_mb;
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_find_doe_mailbox);
+
+void pci_doe_init(struct pci_dev *pdev)
+{
+       struct pci_doe_mb *doe_mb;
+       u16 offset = 0;
+       int rc;
+
+       xa_init(&pdev->doe_mbs);
+
+       while ((offset = pci_find_next_ext_capability(pdev, offset,
+                                                     PCI_EXT_CAP_ID_DOE))) {
+               doe_mb = pci_doe_create_mb(pdev, offset);
+               if (IS_ERR(doe_mb)) {
+                       pci_err(pdev, "[%x] failed to create mailbox: %ld\n",
+                               offset, PTR_ERR(doe_mb));
+                       continue;
+               }
+
+               rc = xa_insert(&pdev->doe_mbs, offset, doe_mb, GFP_KERNEL);
+               if (rc) {
+                       pci_err(pdev, "[%x] failed to insert mailbox: %d\n",
+                               offset, rc);
+                       pci_doe_destroy_mb(doe_mb);
+               }
+       }
+}
+
+void pci_doe_destroy(struct pci_dev *pdev)
+{
+       struct pci_doe_mb *doe_mb;
+       unsigned long index;
+
+       xa_for_each(&pdev->doe_mbs, index, doe_mb)
+               pci_doe_destroy_mb(doe_mb);
+
+       xa_destroy(&pdev->doe_mbs);
+}
+
+void pci_doe_disconnected(struct pci_dev *pdev)
+{
+       struct pci_doe_mb *doe_mb;
+       unsigned long index;
+
+       xa_for_each(&pdev->doe_mbs, index, doe_mb)
+               pci_doe_cancel_tasks(doe_mb);
+}
index 67005a0..2475098 100644 (file)
@@ -311,6 +311,16 @@ struct pci_sriov {
        bool            drivers_autoprobe; /* Auto probing of VFs by driver */
 };
 
+#ifdef CONFIG_PCI_DOE
+void pci_doe_init(struct pci_dev *pdev);
+void pci_doe_destroy(struct pci_dev *pdev);
+void pci_doe_disconnected(struct pci_dev *pdev);
+#else
+static inline void pci_doe_init(struct pci_dev *pdev) { }
+static inline void pci_doe_destroy(struct pci_dev *pdev) { }
+static inline void pci_doe_disconnected(struct pci_dev *pdev) { }
+#endif
+
 /**
  * pci_dev_set_io_state - Set the new error state if possible.
  *
@@ -347,6 +357,7 @@ static inline bool pci_dev_set_io_state(struct pci_dev *dev,
 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
 {
        pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
+       pci_doe_disconnected(dev);
 
        return 0;
 }
index 9123baf..0b2826c 100644 (file)
@@ -2479,6 +2479,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
        pci_aer_init(dev);              /* Advanced Error Reporting */
        pci_dpc_init(dev);              /* Downstream Port Containment */
        pci_rcec_init(dev);             /* Root Complex Event Collector */
+       pci_doe_init(dev);              /* Data Object Exchange */
 
        pcie_report_downtraining(dev);
        pci_init_reset_methods(dev);
index 30a787d..d68aee2 100644 (file)
@@ -38,6 +38,7 @@ static void pci_destroy_dev(struct pci_dev *dev)
        list_del(&dev->bus_list);
        up_write(&pci_bus_sem);
 
+       pci_doe_destroy(dev);
        pcie_aspm_exit_link_state(dev);
        pci_bridge_d3_update(dev);
        pci_free_resources(dev);
index 43765ea..1f14aed 100644 (file)
 #ifndef LINUX_PCI_DOE_H
 #define LINUX_PCI_DOE_H
 
-struct pci_doe_protocol {
-       u16 vid;
-       u8 type;
-};
-
 struct pci_doe_mb;
 
-/**
- * struct pci_doe_task - represents a single query/response
- *
- * @prot: DOE Protocol
- * @request_pl: The request payload
- * @request_pl_sz: Size of the request payload (bytes)
- * @response_pl: The response payload
- * @response_pl_sz: Size of the response payload (bytes)
- * @rv: Return value.  Length of received response or error (bytes)
- * @complete: Called when task is complete
- * @private: Private data for the consumer
- * @work: Used internally by the mailbox
- * @doe_mb: Used internally by the mailbox
- *
- * Payloads are treated as opaque byte streams which are transmitted verbatim,
- * without byte-swapping.  If payloads contain little-endian register values,
- * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
- *
- * The payload sizes and rv are specified in bytes with the following
- * restrictions concerning the protocol.
- *
- *     1) The request_pl_sz must be a multiple of double words (4 bytes)
- *     2) The response_pl_sz must be >= a single double word (4 bytes)
- *     3) rv is returned as bytes but it will be a multiple of double words
- *
- * NOTE there is no need for the caller to initialize work or doe_mb.
- */
-struct pci_doe_task {
-       struct pci_doe_protocol prot;
-       __le32 *request_pl;
-       size_t request_pl_sz;
-       __le32 *response_pl;
-       size_t response_pl_sz;
-       int rv;
-       void (*complete)(struct pci_doe_task *task);
-       void *private;
-
-       /* No need for the user to initialize these fields */
-       struct work_struct work;
-       struct pci_doe_mb *doe_mb;
-};
-
-/**
- * pci_doe_for_each_off - Iterate each DOE capability
- * @pdev: struct pci_dev to iterate
- * @off: u16 of config space offset of each mailbox capability found
- */
-#define pci_doe_for_each_off(pdev, off) \
-       for (off = pci_find_next_ext_capability(pdev, off, \
-                                       PCI_EXT_CAP_ID_DOE); \
-               off > 0; \
-               off = pci_find_next_ext_capability(pdev, off, \
-                                       PCI_EXT_CAP_ID_DOE))
+struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
+                                       u8 type);
 
-struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
-bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
-int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
+int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
+           const void *request, size_t request_sz,
+           void *response, size_t response_sz);
 
 #endif
index 0b57e37..60b8772 100644 (file)
@@ -511,6 +511,9 @@ struct pci_dev {
 #endif
 #ifdef CONFIG_PCI_P2PDMA
        struct pci_p2pdma __rcu *p2pdma;
+#endif
+#ifdef CONFIG_PCI_DOE
+       struct xarray   doe_mbs;        /* Data Object Exchange mailboxes */
 #endif
        u16             acs_cap;        /* ACS Capability offset */
        phys_addr_t     rom;            /* Physical address if not from BAR */
index 86bbacf..14bc6e7 100644 (file)
        ___C(SET_ALERT_CONFIG, "Set Alert Configuration"),                \
        ___C(GET_SHUTDOWN_STATE, "Get Shutdown State"),                   \
        ___C(SET_SHUTDOWN_STATE, "Set Shutdown State"),                   \
-       ___C(GET_POISON, "Get Poison List"),                              \
-       ___C(INJECT_POISON, "Inject Poison"),                             \
-       ___C(CLEAR_POISON, "Clear Poison"),                               \
+       ___DEPRECATED(GET_POISON, "Get Poison List"),                     \
+       ___DEPRECATED(INJECT_POISON, "Inject Poison"),                    \
+       ___DEPRECATED(CLEAR_POISON, "Clear Poison"),                      \
        ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"),         \
-       ___C(SCAN_MEDIA, "Scan Media"),                                   \
-       ___C(GET_SCAN_MEDIA, "Get Scan Media Results"),                   \
+       ___DEPRECATED(SCAN_MEDIA, "Scan Media"),                          \
+       ___DEPRECATED(GET_SCAN_MEDIA, "Get Scan Media Results"),          \
        ___C(MAX, "invalid / last command")
 
 #define ___C(a, b) CXL_MEM_COMMAND_ID_##a
+#define ___DEPRECATED(a, b) CXL_MEM_DEPRECATED_ID_##a
 enum { CXL_CMDS };
 
 #undef ___C
+#undef ___DEPRECATED
 #define ___C(a, b) { b }
+#define ___DEPRECATED(a, b) { "Deprecated " b }
 static const struct {
        const char *name;
 } cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS };
@@ -68,6 +71,28 @@ static const struct {
  */
 
 #undef ___C
+#undef ___DEPRECATED
+#define ___C(a, b) (0)
+#define ___DEPRECATED(a, b) (1)
+
+static const __u8 cxl_deprecated_commands[]
+       __attribute__((__unused__)) = { CXL_CMDS };
+
+/*
+ * Here's how this actually breaks out:
+ * cxl_deprecated_commands[] = {
+ *     [CXL_MEM_COMMAND_ID_INVALID] = 0,
+ *     [CXL_MEM_COMMAND_ID_IDENTIFY] = 0,
+ *     ...
+ *     [CXL_MEM_DEPRECATED_ID_GET_POISON] = 1,
+ *     [CXL_MEM_DEPRECATED_ID_INJECT_POISON] = 1,
+ *     [CXL_MEM_DEPRECATED_ID_CLEAR_POISON] = 1,
+ *     ...
+ * };
+ */
+
+#undef ___C
+#undef ___DEPRECATED
 
 /**
  * struct cxl_command_info - Command information returned from a query.
index 99b56b5..0902c5d 100644 (file)
@@ -13,4 +13,5 @@ void check(void)
        BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM));
        BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST));
        BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST));
+       BUILD_BUG_ON(!IS_ENABLED(CONFIG_DEBUG_FS));
 }
index 9263b04..ba572d0 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/delay.h>
 #include <linux/sizes.h>
 #include <linux/bits.h>
+#include <asm/unaligned.h>
 #include <cxlmem.h>
 
 #include "trace.h"
 #define DEV_SIZE SZ_2G
 #define EFFECT(x) (1U << x)
 
+#define MOCK_INJECT_DEV_MAX 8
+#define MOCK_INJECT_TEST_MAX 128
+
+static unsigned int poison_inject_dev_max = MOCK_INJECT_DEV_MAX;
+
 static struct cxl_cel_entry mock_cel[] = {
        {
                .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS),
@@ -40,6 +46,18 @@ static struct cxl_cel_entry mock_cel[] = {
                .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO),
                .effect = cpu_to_le16(0),
        },
+       {
+               .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON),
+               .effect = cpu_to_le16(0),
+       },
+       {
+               .opcode = cpu_to_le16(CXL_MBOX_OP_INJECT_POISON),
+               .effect = cpu_to_le16(0),
+       },
+       {
+               .opcode = cpu_to_le16(CXL_MBOX_OP_CLEAR_POISON),
+               .effect = cpu_to_le16(0),
+       },
 };
 
 /* See CXL 2.0 Table 181 Get Health Info Output Payload */
@@ -98,6 +116,7 @@ struct cxl_mockmem_data {
        int master_limit;
        struct mock_event_store mes;
        u8 event_buf[SZ_4K];
+       u64 timestamp;
 };
 
 static struct mock_event_log *event_find_log(struct device *dev, int log_type)
@@ -361,6 +380,22 @@ struct cxl_event_mem_module mem_module = {
        }
 };
 
+static int mock_set_timestamp(struct cxl_dev_state *cxlds,
+                             struct cxl_mbox_cmd *cmd)
+{
+       struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
+       struct cxl_mbox_set_timestamp_in *ts = cmd->payload_in;
+
+       if (cmd->size_in != sizeof(*ts))
+               return -EINVAL;
+
+       if (cmd->size_out != 0)
+               return -EINVAL;
+
+       mdata->timestamp = le64_to_cpu(ts->timestamp);
+       return 0;
+}
+
 static void cxl_mock_add_event_logs(struct mock_event_store *mes)
 {
        put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK,
@@ -469,8 +504,11 @@ static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
                        cpu_to_le64(SZ_256M / CXL_CAPACITY_MULTIPLIER),
                .total_capacity =
                        cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER),
+               .inject_poison_limit = cpu_to_le16(MOCK_INJECT_TEST_MAX),
        };
 
+       put_unaligned_le24(CXL_POISON_LIST_MAX, id.poison_list_max_mer);
+
        if (cmd->size_out < sizeof(id))
                return -EINVAL;
 
@@ -888,12 +926,203 @@ static int mock_health_info(struct cxl_dev_state *cxlds,
        return 0;
 }
 
+static struct mock_poison {
+       struct cxl_dev_state *cxlds;
+       u64 dpa;
+} mock_poison_list[MOCK_INJECT_TEST_MAX];
+
+static struct cxl_mbox_poison_out *
+cxl_get_injected_po(struct cxl_dev_state *cxlds, u64 offset, u64 length)
+{
+       struct cxl_mbox_poison_out *po;
+       int nr_records = 0;
+       u64 dpa;
+
+       po = kzalloc(struct_size(po, record, poison_inject_dev_max), GFP_KERNEL);
+       if (!po)
+               return NULL;
+
+       for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+               if (mock_poison_list[i].cxlds != cxlds)
+                       continue;
+               if (mock_poison_list[i].dpa < offset ||
+                   mock_poison_list[i].dpa > offset + length - 1)
+                       continue;
+
+               dpa = mock_poison_list[i].dpa + CXL_POISON_SOURCE_INJECTED;
+               po->record[nr_records].address = cpu_to_le64(dpa);
+               po->record[nr_records].length = cpu_to_le32(1);
+               nr_records++;
+               if (nr_records == poison_inject_dev_max)
+                       break;
+       }
+
+       /* Always return count, even when zero */
+       po->count = cpu_to_le16(nr_records);
+
+       return po;
+}
+
+static int mock_get_poison(struct cxl_dev_state *cxlds,
+                          struct cxl_mbox_cmd *cmd)
+{
+       struct cxl_mbox_poison_in *pi = cmd->payload_in;
+       struct cxl_mbox_poison_out *po;
+       u64 offset = le64_to_cpu(pi->offset);
+       u64 length = le64_to_cpu(pi->length);
+       int nr_records;
+
+       po = cxl_get_injected_po(cxlds, offset, length);
+       if (!po)
+               return -ENOMEM;
+       nr_records = le16_to_cpu(po->count);
+       memcpy(cmd->payload_out, po, struct_size(po, record, nr_records));
+       cmd->size_out = struct_size(po, record, nr_records);
+       kfree(po);
+
+       return 0;
+}
+
+static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds)
+{
+       int count = 0;
+
+       for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+               if (mock_poison_list[i].cxlds == cxlds)
+                       count++;
+       }
+       return (count >= poison_inject_dev_max);
+}
+
+static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa)
+{
+       if (mock_poison_dev_max_injected(cxlds)) {
+               dev_dbg(cxlds->dev,
+                       "Device poison injection limit has been reached: %d\n",
+                       MOCK_INJECT_DEV_MAX);
+               return false;
+       }
+
+       for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+               if (!mock_poison_list[i].cxlds) {
+                       mock_poison_list[i].cxlds = cxlds;
+                       mock_poison_list[i].dpa = dpa;
+                       return true;
+               }
+       }
+       dev_dbg(cxlds->dev,
+               "Mock test poison injection limit has been reached: %d\n",
+               MOCK_INJECT_TEST_MAX);
+
+       return false;
+}
+
+static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa)
+{
+       for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+               if (mock_poison_list[i].cxlds == cxlds &&
+                   mock_poison_list[i].dpa == dpa)
+                       return true;
+       }
+       return false;
+}
+
+static int mock_inject_poison(struct cxl_dev_state *cxlds,
+                             struct cxl_mbox_cmd *cmd)
+{
+       struct cxl_mbox_inject_poison *pi = cmd->payload_in;
+       u64 dpa = le64_to_cpu(pi->address);
+
+       if (mock_poison_found(cxlds, dpa)) {
+               /* Not an error to inject poison if already poisoned */
+               dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa);
+               return 0;
+       }
+       if (!mock_poison_add(cxlds, dpa))
+               return -ENXIO;
+
+       return 0;
+}
+
+static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa)
+{
+       for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+               if (mock_poison_list[i].cxlds == cxlds &&
+                   mock_poison_list[i].dpa == dpa) {
+                       mock_poison_list[i].cxlds = NULL;
+                       return true;
+               }
+       }
+       return false;
+}
+
+static int mock_clear_poison(struct cxl_dev_state *cxlds,
+                            struct cxl_mbox_cmd *cmd)
+{
+       struct cxl_mbox_clear_poison *pi = cmd->payload_in;
+       u64 dpa = le64_to_cpu(pi->address);
+
+       /*
+        * A real CXL device will write pi->write_data to the address
+        * being cleared. In this mock, just delete this address from
+        * the mock poison list.
+        */
+       if (!mock_poison_del(cxlds, dpa))
+               dev_dbg(cxlds->dev, "DPA: 0x%llx not in poison list\n", dpa);
+
+       return 0;
+}
+
+static bool mock_poison_list_empty(void)
+{
+       for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+               if (mock_poison_list[i].cxlds)
+                       return false;
+       }
+       return true;
+}
+
+static ssize_t poison_inject_max_show(struct device_driver *drv, char *buf)
+{
+       return sysfs_emit(buf, "%u\n", poison_inject_dev_max);
+}
+
+static ssize_t poison_inject_max_store(struct device_driver *drv,
+                                      const char *buf, size_t len)
+{
+       int val;
+
+       if (kstrtoint(buf, 0, &val) < 0)
+               return -EINVAL;
+
+       if (!mock_poison_list_empty())
+               return -EBUSY;
+
+       if (val <= MOCK_INJECT_TEST_MAX)
+               poison_inject_dev_max = val;
+       else
+               return -EINVAL;
+
+       return len;
+}
+
+static DRIVER_ATTR_RW(poison_inject_max);
+
+static struct attribute *cxl_mock_mem_core_attrs[] = {
+       &driver_attr_poison_inject_max.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(cxl_mock_mem_core);
+
 static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
 {
        struct device *dev = cxlds->dev;
        int rc = -EIO;
 
        switch (cmd->opcode) {
+       case CXL_MBOX_OP_SET_TIMESTAMP:
+               rc = mock_set_timestamp(cxlds, cmd);
+               break;
        case CXL_MBOX_OP_GET_SUPPORTED_LOGS:
                rc = mock_gsl(cmd);
                break;
@@ -942,6 +1171,15 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
        case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
                rc = mock_passphrase_secure_erase(cxlds, cmd);
                break;
+       case CXL_MBOX_OP_GET_POISON:
+               rc = mock_get_poison(cxlds, cmd);
+               break;
+       case CXL_MBOX_OP_INJECT_POISON:
+               rc = mock_inject_poison(cxlds, cmd);
+               break;
+       case CXL_MBOX_OP_CLEAR_POISON:
+               rc = mock_clear_poison(cxlds, cmd);
+               break;
        default:
                break;
        }
@@ -1010,6 +1248,14 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
        if (rc)
                return rc;
 
+       rc = cxl_poison_state_init(cxlds);
+       if (rc)
+               return rc;
+
+       rc = cxl_set_timestamp(cxlds);
+       if (rc)
+               return rc;
+
        rc = cxl_dev_state_identify(cxlds);
        if (rc)
                return rc;
@@ -1083,6 +1329,7 @@ static struct platform_driver cxl_mock_mem_driver = {
        .driver = {
                .name = KBUILD_MODNAME,
                .dev_groups = cxl_mock_mem_groups,
+               .groups = cxl_mock_mem_core_groups,
        },
 };