Merge tag 'cxl-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 16 Mar 2024 17:04:12 +0000 (10:04 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 16 Mar 2024 17:04:12 +0000 (10:04 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Mar 2024 17:04:12 +0000 (10:04 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Mar 2024 17:04:12 +0000 (10:04 -0700)
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl

index fe61d37..c61f9b8 100644 (file)
--- a/Documentation/ABI/testing/debugfs-cxl
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -33,3 +33,37 @@ Description:
                 device cannot clear poison from the address, -ENXIO is returned.
                 The clear_poison attribute is only visible for devices
                 supporting the capability.
+
+What:          /sys/kernel/debug/cxl/einj_types
+Date:          January, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) Prints the CXL protocol error types made available by
+               the platform in the format:
+
+                       0x<error number> <error type>
+
+               The possible error types are (as of ACPI v6.5):
+
+                       0x1000  CXL.cache Protocol Correctable
+                       0x2000  CXL.cache Protocol Uncorrectable non-fatal
+                       0x4000  CXL.cache Protocol Uncorrectable fatal
+                       0x8000  CXL.mem Protocol Correctable
+                       0x10000 CXL.mem Protocol Uncorrectable non-fatal
+                       0x20000 CXL.mem Protocol Uncorrectable fatal
+
+               The <error number> can be written to einj_inject to inject
+               <error type> into a chosen dport.
+
+What:          /sys/kernel/debug/cxl/$dport_dev/einj_inject
+Date:          January, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (WO) Writing an integer to this file injects the corresponding
+               CXL protocol error into $dport_dev ($dport_dev will be a device
+               name from /sys/bus/pci/devices). The integer to type mapping for
+               injection can be found by reading from einj_types. If the dport
+               was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
+               a CXL 2.0 error is injected.
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl

index fff2581..3f5627a 100644 (file)
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -552,3 +552,37 @@ Description:
                 attribute is only visible for devices supporting the
                 capability. The retrieved errors are logged as kernel
                 events when cxl_poison event tracing is enabled.
+
+
+What:          /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth
+               /sys/bus/cxl/devices/regionZ/accessY/write_banwidth
+Date:          Jan, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) The aggregated read or write bandwidth of the region. The
+               number is the accumulated read or write bandwidth of all CXL memory
+               devices that contributes to the region in MB/s. It is
+               identical data that should appear in
+               /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth or
+               /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth.
+               See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+               the number to the closest initiator and access1 provides the
+               number to the closest CPU.
+
+
+What:          /sys/bus/cxl/devices/regionZ/accessY/read_latency
+               /sys/bus/cxl/devices/regionZ/accessY/write_latency
+Date:          Jan, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) The read or write latency of the region. The number is
+               the worst read or write latency of all CXL memory devices that
+               contributes to the region in nanoseconds. It is identical data
+               that should appear in
+               /sys/devices/system/node/nodeX/accessY/initiators/read_latency or
+               /sys/devices/system/node/nodeX/accessY/initiators/write_latency.
+               See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+               the number to the closest initiator and access1 provides the
+               number to the closest CPU.
diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst

index d6b61d2..c52b9da 100644 (file)
--- a/Documentation/firmware-guide/acpi/apei/einj.rst
+++ b/Documentation/firmware-guide/acpi/apei/einj.rst
@@ -32,6 +32,10 @@ configuration::
    CONFIG_ACPI_APEI
    CONFIG_ACPI_APEI_EINJ
  
+...and to (optionally) enable CXL protocol error injection set::
+
+  CONFIG_ACPI_APEI_EINJ_CXL
+
  The EINJ user interface is in <debugfs mount point>/apei/einj.
  
  The following files belong to it:
@@ -118,6 +122,24 @@ The following files belong to it:
    this actually works depends on what operations the BIOS actually
    includes in the trigger phase.
  
+CXL error types are supported from ACPI 6.5 onwards (given a CXL port
+is present). The EINJ user interface for CXL error types is at
+<debugfs mount point>/cxl. The following files belong to it:
+
+- einj_types:
+
+  Provides the same functionality as available_error_types above, but
+  for CXL error types
+
+- $dport_dev/einj_inject:
+
+  Injects a CXL error type into the CXL port represented by $dport_dev,
+  where $dport_dev is the name of the CXL port (usually a PCIe device name).
+  Error injections targeting a CXL 2.0+ port can use the legacy interface
+  under <debugfs mount point>/apei/einj, while CXL 1.1/1.0 port injections
+  must use this file.
+
+
  BIOS versions based on the ACPI 4.0 specification have limited options
  in controlling where the errors are injected. Your BIOS may support an
  extension (enabled with the param_extension=1 module parameter, or boot
@@ -181,6 +203,18 @@ You should see something like this in dmesg::
    [22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
    [22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
  
+A CXL error injection example with $dport_dev=0000:e0:01.1::
+
+    # cd /sys/kernel/debug/cxl/
+    # ls
+    0000:e0:01.1 0000:0c:00.0
+    # cat einj_types                # See which errors can be injected
+       0x00008000  CXL.mem Protocol Correctable
+       0x00010000  CXL.mem Protocol Uncorrectable non-fatal
+       0x00020000  CXL.mem Protocol Uncorrectable fatal
+    # cd 0000:e0:01.1               # Navigate to dport to inject into
+    # echo 0x8000 > einj_inject     # Inject error
+
  Special notes for injection into SGX enclaves:
  
  There may be a separate BIOS setup option to enable SGX injection.
diff --git a/MAINTAINERS b/MAINTAINERS

index 40489f0..be59443 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5321,6 +5321,7 @@ M:        Dan Williams <dan.j.williams@intel.com>
  L:     linux-cxl@vger.kernel.org
  S:     Maintained
  F:     drivers/cxl/
+F:     include/linux/cxl-einj.h
  F:     include/linux/cxl-event.h
  F:     include/uapi/linux/cxl_mem.h
  F:     tools/testing/cxl/
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig

index 6b18f8b..3cfe7e7 100644 (file)
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -60,6 +60,19 @@ config ACPI_APEI_EINJ
           mainly used for debugging and testing the other parts of
           APEI and some other RAS features.
  
+config ACPI_APEI_EINJ_CXL
+       bool "CXL Error INJection Support"
+       default ACPI_APEI_EINJ
+       depends on ACPI_APEI_EINJ
+       depends on CXL_BUS && CXL_BUS <= ACPI_APEI_EINJ
+       help
+         Support for CXL protocol Error INJection through debugfs/cxl.
+         Availability and which errors are supported is dependent on
+         the host platform. Look to ACPI v6.5 section 18.6.4 and kernel
+         EINJ documentation for more information.
+
+         If unsure say 'n'
+
  config ACPI_APEI_ERST_DEBUG
         tristate "APEI Error Record Serialization Table (ERST) Debug Support"
         depends on ACPI_APEI
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile

index 4dfac21..2c474e6 100644 (file)
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -2,6 +2,8 @@
  obj-$(CONFIG_ACPI_APEI)                += apei.o
  obj-$(CONFIG_ACPI_APEI_GHES)   += ghes.o
  obj-$(CONFIG_ACPI_APEI_EINJ)   += einj.o
+einj-y                         := einj-core.o
+einj-$(CONFIG_ACPI_APEI_EINJ_CXL) += einj-cxl.o
  obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
  
  apei-y := apei-base.o hest.o erst.o bert.o
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h

index 67c2c3b..cd2766c 100644 (file)
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -130,4 +130,22 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
  }
  
  int apei_osc_setup(void);
+
+int einj_get_available_error_type(u32 *type);
+int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
+                     u64 param4);
+int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+                             u64 param3, u64 param4);
+bool einj_is_cxl_error_type(u64 type);
+int einj_validate_error_type(u64 type);
+
+#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE
+#define ACPI_EINJ_CXL_CACHE_CORRECTABLE     BIT(12)
+#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE   BIT(13)
+#define ACPI_EINJ_CXL_CACHE_FATAL           BIT(14)
+#define ACPI_EINJ_CXL_MEM_CORRECTABLE       BIT(15)
+#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE     BIT(16)
+#define ACPI_EINJ_CXL_MEM_FATAL             BIT(17)
+#endif
+
  #endif
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c

new file mode 100644 (file)

index 0000000..66e7f52
--- /dev/null
+++ b/drivers/acpi/apei/einj-core.c
@@ -0,0 +1,914 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error INJection support
+ *
+ * EINJ provides a hardware error injection mechanism, this is useful
+ * for debugging and testing of other APEI and RAS features.
+ *
+ * For more information about EINJ, please refer to ACPI Specification
+ * version 4.0, section 17.5.
+ *
+ * Copyright 2009-2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "EINJ: " fmt
+
+#define SLEEP_UNIT_MIN         1000                    /* 1ms */
+#define SLEEP_UNIT_MAX         5000                    /* 5ms */
+/* Firmware should respond within 1 seconds */
+#define FIRMWARE_TIMEOUT       (1 * USEC_PER_SEC)
+#define ACPI5_VENDOR_BIT       BIT(31)
+#define MEM_ERROR_MASK         (ACPI_EINJ_MEMORY_CORRECTABLE | \
+                               ACPI_EINJ_MEMORY_UNCORRECTABLE | \
+                               ACPI_EINJ_MEMORY_FATAL)
+#define CXL_ERROR_MASK         (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \
+                               ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \
+                               ACPI_EINJ_CXL_CACHE_FATAL | \
+                               ACPI_EINJ_CXL_MEM_CORRECTABLE | \
+                               ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \
+                               ACPI_EINJ_CXL_MEM_FATAL)
+
+/*
+ * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
+ */
+static int acpi5;
+
+struct set_error_type_with_address {
+       u32     type;
+       u32     vendor_extension;
+       u32     flags;
+       u32     apicid;
+       u64     memory_address;
+       u64     memory_address_range;
+       u32     pcie_sbdf;
+};
+enum {
+       SETWA_FLAGS_APICID = 1,
+       SETWA_FLAGS_MEM = 2,
+       SETWA_FLAGS_PCIE_SBDF = 4,
+};
+
+/*
+ * Vendor extensions for platform specific operations
+ */
+struct vendor_error_type_extension {
+       u32     length;
+       u32     pcie_sbdf;
+       u16     vendor_id;
+       u16     device_id;
+       u8      rev_id;
+       u8      reserved[3];
+};
+
+static u32 notrigger;
+
+static u32 vendor_flags;
+static struct debugfs_blob_wrapper vendor_blob;
+static struct debugfs_blob_wrapper vendor_errors;
+static char vendor_dev[64];
+
+/*
+ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
+ * EINJ table through an unpublished extension. Use with caution as
+ * most will ignore the parameter and make their own choice of address
+ * for error injection.  This extension is used only if
+ * param_extension module parameter is specified.
+ */
+struct einj_parameter {
+       u64 type;
+       u64 reserved1;
+       u64 reserved2;
+       u64 param1;
+       u64 param2;
+};
+
+#define EINJ_OP_BUSY                   0x1
+#define EINJ_STATUS_SUCCESS            0x0
+#define EINJ_STATUS_FAIL               0x1
+#define EINJ_STATUS_INVAL              0x2
+
+#define EINJ_TAB_ENTRY(tab)                                            \
+       ((struct acpi_whea_header *)((char *)(tab) +                    \
+                                   sizeof(struct acpi_table_einj)))
+
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
+static struct acpi_table_einj *einj_tab;
+
+static struct apei_resources einj_resources;
+
+static struct apei_exec_ins_type einj_ins_type[] = {
+       [ACPI_EINJ_READ_REGISTER] = {
+               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+               .run   = apei_exec_read_register,
+       },
+       [ACPI_EINJ_READ_REGISTER_VALUE] = {
+               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+               .run   = apei_exec_read_register_value,
+       },
+       [ACPI_EINJ_WRITE_REGISTER] = {
+               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+               .run   = apei_exec_write_register,
+       },
+       [ACPI_EINJ_WRITE_REGISTER_VALUE] = {
+               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+               .run   = apei_exec_write_register_value,
+       },
+       [ACPI_EINJ_NOOP] = {
+               .flags = 0,
+               .run   = apei_exec_noop,
+       },
+};
+
+/*
+ * Prevent EINJ interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ */
+static DEFINE_MUTEX(einj_mutex);
+
+/*
+ * Exported APIs use this flag to exit early if einj_probe() failed.
+ */
+bool einj_initialized __ro_after_init;
+
+static void *einj_param;
+
+static void einj_exec_ctx_init(struct apei_exec_context *ctx)
+{
+       apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
+                          EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
+}
+
+static int __einj_get_available_error_type(u32 *type)
+{
+       struct apei_exec_context ctx;
+       int rc;
+
+       einj_exec_ctx_init(&ctx);
+       rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+       if (rc)
+               return rc;
+       *type = apei_exec_ctx_get_output(&ctx);
+
+       return 0;
+}
+
+/* Get error injection capabilities of the platform */
+int einj_get_available_error_type(u32 *type)
+{
+       int rc;
+
+       mutex_lock(&einj_mutex);
+       rc = __einj_get_available_error_type(type);
+       mutex_unlock(&einj_mutex);
+
+       return rc;
+}
+
+static int einj_timedout(u64 *t)
+{
+       if ((s64)*t < SLEEP_UNIT_MIN) {
+               pr_warn(FW_WARN "Firmware does not respond in time\n");
+               return 1;
+       }
+       *t -= SLEEP_UNIT_MIN;
+       usleep_range(SLEEP_UNIT_MIN, SLEEP_UNIT_MAX);
+
+       return 0;
+}
+
+static void get_oem_vendor_struct(u64 paddr, int offset,
+                                 struct vendor_error_type_extension *v)
+{
+       unsigned long vendor_size;
+       u64 target_pa = paddr + offset + sizeof(struct vendor_error_type_extension);
+
+       vendor_size = v->length - sizeof(struct vendor_error_type_extension);
+
+       if (vendor_size)
+               vendor_errors.data = acpi_os_map_memory(target_pa, vendor_size);
+
+       if (vendor_errors.data)
+               vendor_errors.size = vendor_size;
+}
+
+static void check_vendor_extension(u64 paddr,
+                                  struct set_error_type_with_address *v5param)
+{
+       int     offset = v5param->vendor_extension;
+       struct  vendor_error_type_extension *v;
+       u32     sbdf;
+
+       if (!offset)
+               return;
+       v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
+       if (!v)
+               return;
+       get_oem_vendor_struct(paddr, offset, v);
+       sbdf = v->pcie_sbdf;
+       sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
+               sbdf >> 24, (sbdf >> 16) & 0xff,
+               (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
+                v->vendor_id, v->device_id, v->rev_id);
+       acpi_os_unmap_iomem(v, sizeof(*v));
+}
+
+static void *einj_get_parameter_address(void)
+{
+       int i;
+       u64 pa_v4 = 0, pa_v5 = 0;
+       struct acpi_whea_header *entry;
+
+       entry = EINJ_TAB_ENTRY(einj_tab);
+       for (i = 0; i < einj_tab->entries; i++) {
+               if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
+                   entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+                   entry->register_region.space_id ==
+                   ACPI_ADR_SPACE_SYSTEM_MEMORY)
+                       pa_v4 = get_unaligned(&entry->register_region.address);
+               if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
+                   entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+                   entry->register_region.space_id ==
+                   ACPI_ADR_SPACE_SYSTEM_MEMORY)
+                       pa_v5 = get_unaligned(&entry->register_region.address);
+               entry++;
+       }
+       if (pa_v5) {
+               struct set_error_type_with_address *v5param;
+
+               v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
+               if (v5param) {
+                       acpi5 = 1;
+                       check_vendor_extension(pa_v5, v5param);
+                       return v5param;
+               }
+       }
+       if (param_extension && pa_v4) {
+               struct einj_parameter *v4param;
+
+               v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
+               if (!v4param)
+                       return NULL;
+               if (v4param->reserved1 || v4param->reserved2) {
+                       acpi_os_unmap_iomem(v4param, sizeof(*v4param));
+                       return NULL;
+               }
+               return v4param;
+       }
+
+       return NULL;
+}
+
+/* do sanity check to trigger table */
+static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
+{
+       if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
+               return -EINVAL;
+       if (trigger_tab->table_size > PAGE_SIZE ||
+           trigger_tab->table_size < trigger_tab->header_size)
+               return -EINVAL;
+       if (trigger_tab->entry_count !=
+           (trigger_tab->table_size - trigger_tab->header_size) /
+           sizeof(struct acpi_einj_entry))
+               return -EINVAL;
+
+       return 0;
+}
+
+static struct acpi_generic_address *einj_get_trigger_parameter_region(
+       struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2)
+{
+       int i;
+       struct acpi_whea_header *entry;
+
+       entry = (struct acpi_whea_header *)
+               ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+       for (i = 0; i < trigger_tab->entry_count; i++) {
+               if (entry->action == ACPI_EINJ_TRIGGER_ERROR &&
+               entry->instruction <= ACPI_EINJ_WRITE_REGISTER_VALUE &&
+               entry->register_region.space_id ==
+                       ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+               (entry->register_region.address & param2) == (param1 & param2))
+                       return &entry->register_region;
+               entry++;
+       }
+
+       return NULL;
+}
+/* Execute instructions in trigger error action table */
+static int __einj_error_trigger(u64 trigger_paddr, u32 type,
+                               u64 param1, u64 param2)
+{
+       struct acpi_einj_trigger *trigger_tab = NULL;
+       struct apei_exec_context trigger_ctx;
+       struct apei_resources trigger_resources;
+       struct acpi_whea_header *trigger_entry;
+       struct resource *r;
+       u32 table_size;
+       int rc = -EIO;
+       struct acpi_generic_address *trigger_param_region = NULL;
+
+       r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
+                              "APEI EINJ Trigger Table");
+       if (!r) {
+               pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n",
+                      (unsigned long long)trigger_paddr,
+                      (unsigned long long)trigger_paddr +
+                           sizeof(*trigger_tab) - 1);
+               goto out;
+       }
+       trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
+       if (!trigger_tab) {
+               pr_err("Failed to map trigger table!\n");
+               goto out_rel_header;
+       }
+       rc = einj_check_trigger_header(trigger_tab);
+       if (rc) {
+               pr_warn(FW_BUG "Invalid trigger error action table.\n");
+               goto out_rel_header;
+       }
+
+       /* No action structures in the TRIGGER_ERROR table, nothing to do */
+       if (!trigger_tab->entry_count)
+               goto out_rel_header;
+
+       rc = -EIO;
+       table_size = trigger_tab->table_size;
+       r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
+                              table_size - sizeof(*trigger_tab),
+                              "APEI EINJ Trigger Table");
+       if (!r) {
+               pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
+                      (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
+                      (unsigned long long)trigger_paddr + table_size - 1);
+               goto out_rel_header;
+       }
+       iounmap(trigger_tab);
+       trigger_tab = ioremap_cache(trigger_paddr, table_size);
+       if (!trigger_tab) {
+               pr_err("Failed to map trigger table!\n");
+               goto out_rel_entry;
+       }
+       trigger_entry = (struct acpi_whea_header *)
+               ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+       apei_resources_init(&trigger_resources);
+       apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
+                          ARRAY_SIZE(einj_ins_type),
+                          trigger_entry, trigger_tab->entry_count);
+       rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
+       if (rc)
+               goto out_fini;
+       rc = apei_resources_sub(&trigger_resources, &einj_resources);
+       if (rc)
+               goto out_fini;
+       /*
+        * Some firmware will access target address specified in
+        * param1 to trigger the error when injecting memory error.
+        * This will cause resource conflict with regular memory.  So
+        * remove it from trigger table resources.
+        */
+       if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
+               struct apei_resources addr_resources;
+
+               apei_resources_init(&addr_resources);
+               trigger_param_region = einj_get_trigger_parameter_region(
+                       trigger_tab, param1, param2);
+               if (trigger_param_region) {
+                       rc = apei_resources_add(&addr_resources,
+                               trigger_param_region->address,
+                               trigger_param_region->bit_width/8, true);
+                       if (rc)
+                               goto out_fini;
+                       rc = apei_resources_sub(&trigger_resources,
+                                       &addr_resources);
+               }
+               apei_resources_fini(&addr_resources);
+               if (rc)
+                       goto out_fini;
+       }
+       rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
+       if (rc)
+               goto out_fini;
+       rc = apei_exec_pre_map_gars(&trigger_ctx);
+       if (rc)
+               goto out_release;
+
+       rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR);
+
+       apei_exec_post_unmap_gars(&trigger_ctx);
+out_release:
+       apei_resources_release(&trigger_resources);
+out_fini:
+       apei_resources_fini(&trigger_resources);
+out_rel_entry:
+       release_mem_region(trigger_paddr + sizeof(*trigger_tab),
+                          table_size - sizeof(*trigger_tab));
+out_rel_header:
+       release_mem_region(trigger_paddr, sizeof(*trigger_tab));
+out:
+       if (trigger_tab)
+               iounmap(trigger_tab);
+
+       return rc;
+}
+
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+                              u64 param3, u64 param4)
+{
+       struct apei_exec_context ctx;
+       u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
+       int rc;
+
+       einj_exec_ctx_init(&ctx);
+
+       rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+       if (rc)
+               return rc;
+       apei_exec_ctx_set_input(&ctx, type);
+       if (acpi5) {
+               struct set_error_type_with_address *v5param = einj_param;
+
+               v5param->type = type;
+               if (type & ACPI5_VENDOR_BIT) {
+                       switch (vendor_flags) {
+                       case SETWA_FLAGS_APICID:
+                               v5param->apicid = param1;
+                               break;
+                       case SETWA_FLAGS_MEM:
+                               v5param->memory_address = param1;
+                               v5param->memory_address_range = param2;
+                               break;
+                       case SETWA_FLAGS_PCIE_SBDF:
+                               v5param->pcie_sbdf = param1;
+                               break;
+                       }
+                       v5param->flags = vendor_flags;
+               } else if (flags) {
+                       v5param->flags = flags;
+                       v5param->memory_address = param1;
+                       v5param->memory_address_range = param2;
+                       v5param->apicid = param3;
+                       v5param->pcie_sbdf = param4;
+               } else {
+                       switch (type) {
+                       case ACPI_EINJ_PROCESSOR_CORRECTABLE:
+                       case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
+                       case ACPI_EINJ_PROCESSOR_FATAL:
+                               v5param->apicid = param1;
+                               v5param->flags = SETWA_FLAGS_APICID;
+                               break;
+                       case ACPI_EINJ_MEMORY_CORRECTABLE:
+                       case ACPI_EINJ_MEMORY_UNCORRECTABLE:
+                       case ACPI_EINJ_MEMORY_FATAL:
+                               v5param->memory_address = param1;
+                               v5param->memory_address_range = param2;
+                               v5param->flags = SETWA_FLAGS_MEM;
+                               break;
+                       case ACPI_EINJ_PCIX_CORRECTABLE:
+                       case ACPI_EINJ_PCIX_UNCORRECTABLE:
+                       case ACPI_EINJ_PCIX_FATAL:
+                               v5param->pcie_sbdf = param1;
+                               v5param->flags = SETWA_FLAGS_PCIE_SBDF;
+                               break;
+                       }
+               }
+       } else {
+               rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
+               if (rc)
+                       return rc;
+               if (einj_param) {
+                       struct einj_parameter *v4param = einj_param;
+
+                       v4param->param1 = param1;
+                       v4param->param2 = param2;
+               }
+       }
+       rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
+       if (rc)
+               return rc;
+       for (;;) {
+               rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS);
+               if (rc)
+                       return rc;
+               val = apei_exec_ctx_get_output(&ctx);
+               if (!(val & EINJ_OP_BUSY))
+                       break;
+               if (einj_timedout(&timeout))
+                       return -EIO;
+       }
+       rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS);
+       if (rc)
+               return rc;
+       val = apei_exec_ctx_get_output(&ctx);
+       if (val == EINJ_STATUS_FAIL)
+               return -EBUSY;
+       else if (val == EINJ_STATUS_INVAL)
+               return -EINVAL;
+
+       /*
+        * The error is injected into the platform successfully, then it needs
+        * to trigger the error.
+        */
+       rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE);
+       if (rc)
+               return rc;
+       trigger_paddr = apei_exec_ctx_get_output(&ctx);
+       if (notrigger == 0) {
+               rc = __einj_error_trigger(trigger_paddr, type, param1, param2);
+               if (rc)
+                       return rc;
+       }
+       rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
+
+       return rc;
+}
+
+/* Inject the specified hardware error */
+int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
+                     u64 param4)
+{
+       int rc;
+       u64 base_addr, size;
+
+       /* If user manually set "flags", make sure it is legal */
+       if (flags && (flags &
+               ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+               return -EINVAL;
+
+       /*
+        * We need extra sanity checks for memory errors.
+        * Other types leap directly to injection.
+        */
+
+       /* ensure param1/param2 existed */
+       if (!(param_extension || acpi5))
+               goto inject;
+
+       /* ensure injection is memory related */
+       if (type & ACPI5_VENDOR_BIT) {
+               if (vendor_flags != SETWA_FLAGS_MEM)
+                       goto inject;
+       } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
+               goto inject;
+       }
+
+       /*
+        * Injections targeting a CXL 1.0/1.1 port have to be injected
+        * via the einj_cxl_rch_error_inject() path as that does the proper
+        * validation of the given RCRB base (MMIO) address.
+        */
+       if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM))
+               return -EINVAL;
+
+       /*
+        * Disallow crazy address masks that give BIOS leeway to pick
+        * injection address almost anywhere. Insist on page or
+        * better granularity and that target address is normal RAM or
+        * NVDIMM.
+        */
+       base_addr = param1 & param2;
+       size = ~param2 + 1;
+
+       if (((param2 & PAGE_MASK) != PAGE_MASK) ||
+           ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+                               != REGION_INTERSECTS) &&
+            (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
+                               != REGION_INTERSECTS) &&
+            (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
+                               != REGION_INTERSECTS) &&
+            !arch_is_platform_page(base_addr)))
+               return -EINVAL;
+
+       if (is_zero_pfn(base_addr >> PAGE_SHIFT))
+               return -EADDRINUSE;
+
+inject:
+       mutex_lock(&einj_mutex);
+       rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+       mutex_unlock(&einj_mutex);
+
+       return rc;
+}
+
+int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+                             u64 param3, u64 param4)
+{
+       int rc;
+
+       if (!(einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)))
+               return -EINVAL;
+
+       mutex_lock(&einj_mutex);
+       rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+       mutex_unlock(&einj_mutex);
+
+       return rc;
+}
+
+static u32 error_type;
+static u32 error_flags;
+static u64 error_param1;
+static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
+static struct dentry *einj_debug_dir;
+static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
+       { BIT(0), "Processor Correctable" },
+       { BIT(1), "Processor Uncorrectable non-fatal" },
+       { BIT(2), "Processor Uncorrectable fatal" },
+       { BIT(3), "Memory Correctable" },
+       { BIT(4), "Memory Uncorrectable non-fatal" },
+       { BIT(5), "Memory Uncorrectable fatal" },
+       { BIT(6), "PCI Express Correctable" },
+       { BIT(7), "PCI Express Uncorrectable non-fatal" },
+       { BIT(8), "PCI Express Uncorrectable fatal" },
+       { BIT(9), "Platform Correctable" },
+       { BIT(10), "Platform Uncorrectable non-fatal" },
+       { BIT(11), "Platform Uncorrectable fatal"},
+       { BIT(31), "Vendor Defined Error Types" },
+};
+
+static int available_error_type_show(struct seq_file *m, void *v)
+{
+       int rc;
+       u32 error_type = 0;
+
+       rc = einj_get_available_error_type(&error_type);
+       if (rc)
+               return rc;
+       for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
+               if (error_type & einj_error_type_string[pos].mask)
+                       seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
+                                  einj_error_type_string[pos].str);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(available_error_type);
+
+static int error_type_get(void *data, u64 *val)
+{
+       *val = error_type;
+
+       return 0;
+}
+
+bool einj_is_cxl_error_type(u64 type)
+{
+       return (type & CXL_ERROR_MASK) && (!(type & ACPI5_VENDOR_BIT));
+}
+
+int einj_validate_error_type(u64 type)
+{
+       u32 tval, vendor, available_error_type = 0;
+       int rc;
+
+       /* Only low 32 bits for error type are valid */
+       if (type & GENMASK_ULL(63, 32))
+               return -EINVAL;
+
+       /*
+        * Vendor defined types have 0x80000000 bit set, and
+        * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
+        */
+       vendor = type & ACPI5_VENDOR_BIT;
+       tval = type & GENMASK(30, 0);
+
+       /* Only one error type can be specified */
+       if (tval & (tval - 1))
+               return -EINVAL;
+       if (!vendor) {
+               rc = einj_get_available_error_type(&available_error_type);
+               if (rc)
+                       return rc;
+               if (!(type & available_error_type))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+       int rc;
+
+       rc = einj_validate_error_type(val);
+       if (rc)
+               return rc;
+
+       error_type = val;
+
+       return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set,
+                        "0x%llx\n");
+
+static int error_inject_set(void *data, u64 val)
+{
+       if (!error_type)
+               return -EINVAL;
+
+       return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+               error_param3, error_param4);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(error_inject_fops, NULL, error_inject_set, "%llu\n");
+
+static int einj_check_table(struct acpi_table_einj *einj_tab)
+{
+       if ((einj_tab->header_length !=
+            (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header)))
+           && (einj_tab->header_length != sizeof(struct acpi_table_einj)))
+               return -EINVAL;
+       if (einj_tab->header.length < sizeof(struct acpi_table_einj))
+               return -EINVAL;
+       if (einj_tab->entries !=
+           (einj_tab->header.length - sizeof(struct acpi_table_einj)) /
+           sizeof(struct acpi_einj_entry))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int __init einj_probe(struct platform_device *pdev)
+{
+       int rc;
+       acpi_status status;
+       struct apei_exec_context ctx;
+
+       if (acpi_disabled) {
+               pr_debug("ACPI disabled.\n");
+               return -ENODEV;
+       }
+
+       status = acpi_get_table(ACPI_SIG_EINJ, 0,
+                               (struct acpi_table_header **)&einj_tab);
+       if (status == AE_NOT_FOUND) {
+               pr_debug("EINJ table not found.\n");
+               return -ENODEV;
+       } else if (ACPI_FAILURE(status)) {
+               pr_err("Failed to get EINJ table: %s\n",
+                               acpi_format_exception(status));
+               return -EINVAL;
+       }
+
+       rc = einj_check_table(einj_tab);
+       if (rc) {
+               pr_warn(FW_BUG "Invalid EINJ table.\n");
+               goto err_put_table;
+       }
+
+       rc = -ENOMEM;
+       einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
+
+       debugfs_create_file("available_error_type", S_IRUSR, einj_debug_dir,
+                           NULL, &available_error_type_fops);
+       debugfs_create_file_unsafe("error_type", 0600, einj_debug_dir,
+                                  NULL, &error_type_fops);
+       debugfs_create_file_unsafe("error_inject", 0200, einj_debug_dir,
+                                  NULL, &error_inject_fops);
+
+       apei_resources_init(&einj_resources);
+       einj_exec_ctx_init(&ctx);
+       rc = apei_exec_collect_resources(&ctx, &einj_resources);
+       if (rc) {
+               pr_err("Error collecting EINJ resources.\n");
+               goto err_fini;
+       }
+
+       rc = apei_resources_request(&einj_resources, "APEI EINJ");
+       if (rc) {
+               pr_err("Error requesting memory/port resources.\n");
+               goto err_fini;
+       }
+
+       rc = apei_exec_pre_map_gars(&ctx);
+       if (rc) {
+               pr_err("Error pre-mapping GARs.\n");
+               goto err_release;
+       }
+
+       einj_param = einj_get_parameter_address();
+       if ((param_extension || acpi5) && einj_param) {
+               debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir,
+                                  &error_flags);
+               debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir,
+                                  &error_param1);
+               debugfs_create_x64("param2", S_IRUSR | S_IWUSR, einj_debug_dir,
+                                  &error_param2);
+               debugfs_create_x64("param3", S_IRUSR | S_IWUSR, einj_debug_dir,
+                                  &error_param3);
+               debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir,
+                                  &error_param4);
+               debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
+                                  einj_debug_dir, &notrigger);
+       }
+
+       if (vendor_dev[0]) {
+               vendor_blob.data = vendor_dev;
+               vendor_blob.size = strlen(vendor_dev);
+               debugfs_create_blob("vendor", S_IRUSR, einj_debug_dir,
+                                   &vendor_blob);
+               debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
+                                  einj_debug_dir, &vendor_flags);
+       }
+
+       if (vendor_errors.size)
+               debugfs_create_blob("oem_error", 0600, einj_debug_dir,
+                                   &vendor_errors);
+
+       pr_info("Error INJection is initialized.\n");
+
+       return 0;
+
+err_release:
+       apei_resources_release(&einj_resources);
+err_fini:
+       apei_resources_fini(&einj_resources);
+       debugfs_remove_recursive(einj_debug_dir);
+err_put_table:
+       acpi_put_table((struct acpi_table_header *)einj_tab);
+
+       return rc;
+}
+
+static void __exit einj_remove(struct platform_device *pdev)
+{
+       struct apei_exec_context ctx;
+
+       if (einj_param) {
+               acpi_size size = (acpi5) ?
+                       sizeof(struct set_error_type_with_address) :
+                       sizeof(struct einj_parameter);
+
+               acpi_os_unmap_iomem(einj_param, size);
+               if (vendor_errors.size)
+                       acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
+       }
+       einj_exec_ctx_init(&ctx);
+       apei_exec_post_unmap_gars(&ctx);
+       apei_resources_release(&einj_resources);
+       apei_resources_fini(&einj_resources);
+       debugfs_remove_recursive(einj_debug_dir);
+       acpi_put_table((struct acpi_table_header *)einj_tab);
+}
+
+static struct platform_device *einj_dev;
+static struct platform_driver einj_driver = {
+       .remove_new = einj_remove,
+       .driver = {
+               .name = "acpi-einj",
+       },
+};
+
+static int __init einj_init(void)
+{
+       struct platform_device_info einj_dev_info = {
+               .name = "acpi-einj",
+               .id = -1,
+       };
+       int rc;
+
+       einj_dev = platform_device_register_full(&einj_dev_info);
+       if (IS_ERR(einj_dev))
+               return PTR_ERR(einj_dev);
+
+       rc = platform_driver_probe(&einj_driver, einj_probe);
+       einj_initialized = rc == 0;
+
+       return 0;
+}
+
+static void __exit einj_exit(void)
+{
+       if (einj_initialized)
+               platform_driver_unregister(&einj_driver);
+
+       platform_device_del(einj_dev);
+}
+
+module_init(einj_init);
+module_exit(einj_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error INJection support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c

new file mode 100644 (file)

index 0000000..8b8be0c
--- /dev/null
+++ b/drivers/acpi/apei/einj-cxl.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CXL Error INJection support. Used by CXL core to inject
+ * protocol errors into CXL ports.
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#include <linux/einj-cxl.h>
+#include <linux/seq_file.h>
+#include <linux/pci.h>
+
+#include "apei-internal.h"
+
+/* Defined in einj-core.c */
+extern bool einj_initialized;
+
+static struct { u32 mask; const char *str; } const einj_cxl_error_type_string[] = {
+       { ACPI_EINJ_CXL_CACHE_CORRECTABLE, "CXL.cache Protocol Correctable" },
+       { ACPI_EINJ_CXL_CACHE_UNCORRECTABLE, "CXL.cache Protocol Uncorrectable non-fatal" },
+       { ACPI_EINJ_CXL_CACHE_FATAL, "CXL.cache Protocol Uncorrectable fatal" },
+       { ACPI_EINJ_CXL_MEM_CORRECTABLE, "CXL.mem Protocol Correctable" },
+       { ACPI_EINJ_CXL_MEM_UNCORRECTABLE, "CXL.mem Protocol Uncorrectable non-fatal" },
+       { ACPI_EINJ_CXL_MEM_FATAL, "CXL.mem Protocol Uncorrectable fatal" },
+};
+
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v)
+{
+       int cxl_err, rc;
+       u32 available_error_type = 0;
+
+       rc = einj_get_available_error_type(&available_error_type);
+       if (rc)
+               return rc;
+
+       for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) {
+               cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos;
+
+               if (available_error_type & cxl_err)
+                       seq_printf(m, "0x%08x\t%s\n",
+                                  einj_cxl_error_type_string[pos].mask,
+                                  einj_cxl_error_type_string[pos].str);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL);
+
+static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf)
+{
+       struct pci_bus *pbus;
+       struct pci_host_bridge *bridge;
+       u64 seg = 0, bus;
+
+       pbus = dport_dev->bus;
+       bridge = pci_find_host_bridge(pbus);
+
+       if (!bridge)
+               return -ENODEV;
+
+       if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
+               seg = bridge->domain_nr;
+
+       bus = pbus->number;
+       *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn;
+
+       return 0;
+}
+
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+       int rc;
+
+       /* Only CXL error types can be specified */
+       if (!einj_is_cxl_error_type(type))
+               return -EINVAL;
+
+       rc = einj_validate_error_type(type);
+       if (rc)
+               return rc;
+
+       return einj_cxl_rch_error_inject(type, 0x2, rcrb, GENMASK_ULL(63, 0),
+                                        0, 0);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL);
+
+int einj_cxl_inject_error(struct pci_dev *dport, u64 type)
+{
+       u64 param4 = 0;
+       int rc;
+
+       /* Only CXL error types can be specified */
+       if (!einj_is_cxl_error_type(type))
+               return -EINVAL;
+
+       rc = einj_validate_error_type(type);
+       if (rc)
+               return rc;
+
+       rc = cxl_dport_get_sbdf(dport, &param4);
+       if (rc)
+               return rc;
+
+       return einj_error_inject(type, 0x4, 0, 0, 0, param4);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL);
+
+bool einj_cxl_is_initialized(void)
+{
+       return einj_initialized;
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, CXL);
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c

deleted file mode 100644 (file)

index 89fb933..0000000
--- a/drivers/acpi/apei/einj.c
+++ /dev/null
@@ -1,834 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * APEI Error INJection support
- *
- * EINJ provides a hardware error injection mechanism, this is useful
- * for debugging and testing of other APEI and RAS features.
- *
- * For more information about EINJ, please refer to ACPI Specification
- * version 4.0, section 17.5.
- *
- * Copyright 2009-2010 Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/nmi.h>
-#include <linux/delay.h>
-#include <linux/mm.h>
-#include <asm/unaligned.h>
-
-#include "apei-internal.h"
-
-#undef pr_fmt
-#define pr_fmt(fmt) "EINJ: " fmt
-
-#define SLEEP_UNIT_MIN         1000                    /* 1ms */
-#define SLEEP_UNIT_MAX         5000                    /* 5ms */
-/* Firmware should respond within 1 seconds */
-#define FIRMWARE_TIMEOUT       (1 * USEC_PER_SEC)
-#define ACPI5_VENDOR_BIT       BIT(31)
-#define MEM_ERROR_MASK         (ACPI_EINJ_MEMORY_CORRECTABLE | \
-                               ACPI_EINJ_MEMORY_UNCORRECTABLE | \
-                               ACPI_EINJ_MEMORY_FATAL)
-
-/*
- * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
- */
-static int acpi5;
-
-struct set_error_type_with_address {
-       u32     type;
-       u32     vendor_extension;
-       u32     flags;
-       u32     apicid;
-       u64     memory_address;
-       u64     memory_address_range;
-       u32     pcie_sbdf;
-};
-enum {
-       SETWA_FLAGS_APICID = 1,
-       SETWA_FLAGS_MEM = 2,
-       SETWA_FLAGS_PCIE_SBDF = 4,
-};
-
-/*
- * Vendor extensions for platform specific operations
- */
-struct vendor_error_type_extension {
-       u32     length;
-       u32     pcie_sbdf;
-       u16     vendor_id;
-       u16     device_id;
-       u8      rev_id;
-       u8      reserved[3];
-};
-
-static u32 notrigger;
-
-static u32 vendor_flags;
-static struct debugfs_blob_wrapper vendor_blob;
-static struct debugfs_blob_wrapper vendor_errors;
-static char vendor_dev[64];
-
-/*
- * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
- * EINJ table through an unpublished extension. Use with caution as
- * most will ignore the parameter and make their own choice of address
- * for error injection.  This extension is used only if
- * param_extension module parameter is specified.
- */
-struct einj_parameter {
-       u64 type;
-       u64 reserved1;
-       u64 reserved2;
-       u64 param1;
-       u64 param2;
-};
-
-#define EINJ_OP_BUSY                   0x1
-#define EINJ_STATUS_SUCCESS            0x0
-#define EINJ_STATUS_FAIL               0x1
-#define EINJ_STATUS_INVAL              0x2
-
-#define EINJ_TAB_ENTRY(tab)                                            \
-       ((struct acpi_whea_header *)((char *)(tab) +                    \
-                                   sizeof(struct acpi_table_einj)))
-
-static bool param_extension;
-module_param(param_extension, bool, 0);
-
-static struct acpi_table_einj *einj_tab;
-
-static struct apei_resources einj_resources;
-
-static struct apei_exec_ins_type einj_ins_type[] = {
-       [ACPI_EINJ_READ_REGISTER] = {
-               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
-               .run   = apei_exec_read_register,
-       },
-       [ACPI_EINJ_READ_REGISTER_VALUE] = {
-               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
-               .run   = apei_exec_read_register_value,
-       },
-       [ACPI_EINJ_WRITE_REGISTER] = {
-               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
-               .run   = apei_exec_write_register,
-       },
-       [ACPI_EINJ_WRITE_REGISTER_VALUE] = {
-               .flags = APEI_EXEC_INS_ACCESS_REGISTER,
-               .run   = apei_exec_write_register_value,
-       },
-       [ACPI_EINJ_NOOP] = {
-               .flags = 0,
-               .run   = apei_exec_noop,
-       },
-};
-
-/*
- * Prevent EINJ interpreter to run simultaneously, because the
- * corresponding firmware implementation may not work properly when
- * invoked simultaneously.
- */
-static DEFINE_MUTEX(einj_mutex);
-
-static void *einj_param;
-
-static void einj_exec_ctx_init(struct apei_exec_context *ctx)
-{
-       apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
-                          EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
-}
-
-static int __einj_get_available_error_type(u32 *type)
-{
-       struct apei_exec_context ctx;
-       int rc;
-
-       einj_exec_ctx_init(&ctx);
-       rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
-       if (rc)
-               return rc;
-       *type = apei_exec_ctx_get_output(&ctx);
-
-       return 0;
-}
-
-/* Get error injection capabilities of the platform */
-static int einj_get_available_error_type(u32 *type)
-{
-       int rc;
-
-       mutex_lock(&einj_mutex);
-       rc = __einj_get_available_error_type(type);
-       mutex_unlock(&einj_mutex);
-
-       return rc;
-}
-
-static int einj_timedout(u64 *t)
-{
-       if ((s64)*t < SLEEP_UNIT_MIN) {
-               pr_warn(FW_WARN "Firmware does not respond in time\n");
-               return 1;
-       }
-       *t -= SLEEP_UNIT_MIN;
-       usleep_range(SLEEP_UNIT_MIN, SLEEP_UNIT_MAX);
-
-       return 0;
-}
-
-static void get_oem_vendor_struct(u64 paddr, int offset,
-                                 struct vendor_error_type_extension *v)
-{
-       unsigned long vendor_size;
-       u64 target_pa = paddr + offset + sizeof(struct vendor_error_type_extension);
-
-       vendor_size = v->length - sizeof(struct vendor_error_type_extension);
-
-       if (vendor_size)
-               vendor_errors.data = acpi_os_map_memory(target_pa, vendor_size);
-
-       if (vendor_errors.data)
-               vendor_errors.size = vendor_size;
-}
-
-static void check_vendor_extension(u64 paddr,
-                                  struct set_error_type_with_address *v5param)
-{
-       int     offset = v5param->vendor_extension;
-       struct  vendor_error_type_extension *v;
-       u32     sbdf;
-
-       if (!offset)
-               return;
-       v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
-       if (!v)
-               return;
-       get_oem_vendor_struct(paddr, offset, v);
-       sbdf = v->pcie_sbdf;
-       sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
-               sbdf >> 24, (sbdf >> 16) & 0xff,
-               (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
-                v->vendor_id, v->device_id, v->rev_id);
-       acpi_os_unmap_iomem(v, sizeof(*v));
-}
-
-static void *einj_get_parameter_address(void)
-{
-       int i;
-       u64 pa_v4 = 0, pa_v5 = 0;
-       struct acpi_whea_header *entry;
-
-       entry = EINJ_TAB_ENTRY(einj_tab);
-       for (i = 0; i < einj_tab->entries; i++) {
-               if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
-                   entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
-                   entry->register_region.space_id ==
-                   ACPI_ADR_SPACE_SYSTEM_MEMORY)
-                       pa_v4 = get_unaligned(&entry->register_region.address);
-               if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
-                   entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
-                   entry->register_region.space_id ==
-                   ACPI_ADR_SPACE_SYSTEM_MEMORY)
-                       pa_v5 = get_unaligned(&entry->register_region.address);
-               entry++;
-       }
-       if (pa_v5) {
-               struct set_error_type_with_address *v5param;
-
-               v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
-               if (v5param) {
-                       acpi5 = 1;
-                       check_vendor_extension(pa_v5, v5param);
-                       return v5param;
-               }
-       }
-       if (param_extension && pa_v4) {
-               struct einj_parameter *v4param;
-
-               v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
-               if (!v4param)
-                       return NULL;
-               if (v4param->reserved1 || v4param->reserved2) {
-                       acpi_os_unmap_iomem(v4param, sizeof(*v4param));
-                       return NULL;
-               }
-               return v4param;
-       }
-
-       return NULL;
-}
-
-/* do sanity check to trigger table */
-static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
-{
-       if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
-               return -EINVAL;
-       if (trigger_tab->table_size > PAGE_SIZE ||
-           trigger_tab->table_size < trigger_tab->header_size)
-               return -EINVAL;
-       if (trigger_tab->entry_count !=
-           (trigger_tab->table_size - trigger_tab->header_size) /
-           sizeof(struct acpi_einj_entry))
-               return -EINVAL;
-
-       return 0;
-}
-
-static struct acpi_generic_address *einj_get_trigger_parameter_region(
-       struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2)
-{
-       int i;
-       struct acpi_whea_header *entry;
-
-       entry = (struct acpi_whea_header *)
-               ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
-       for (i = 0; i < trigger_tab->entry_count; i++) {
-               if (entry->action == ACPI_EINJ_TRIGGER_ERROR &&
-               entry->instruction <= ACPI_EINJ_WRITE_REGISTER_VALUE &&
-               entry->register_region.space_id ==
-                       ACPI_ADR_SPACE_SYSTEM_MEMORY &&
-               (entry->register_region.address & param2) == (param1 & param2))
-                       return &entry->register_region;
-               entry++;
-       }
-
-       return NULL;
-}
-/* Execute instructions in trigger error action table */
-static int __einj_error_trigger(u64 trigger_paddr, u32 type,
-                               u64 param1, u64 param2)
-{
-       struct acpi_einj_trigger *trigger_tab = NULL;
-       struct apei_exec_context trigger_ctx;
-       struct apei_resources trigger_resources;
-       struct acpi_whea_header *trigger_entry;
-       struct resource *r;
-       u32 table_size;
-       int rc = -EIO;
-       struct acpi_generic_address *trigger_param_region = NULL;
-
-       r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
-                              "APEI EINJ Trigger Table");
-       if (!r) {
-               pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n",
-                      (unsigned long long)trigger_paddr,
-                      (unsigned long long)trigger_paddr +
-                           sizeof(*trigger_tab) - 1);
-               goto out;
-       }
-       trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
-       if (!trigger_tab) {
-               pr_err("Failed to map trigger table!\n");
-               goto out_rel_header;
-       }
-       rc = einj_check_trigger_header(trigger_tab);
-       if (rc) {
-               pr_warn(FW_BUG "Invalid trigger error action table.\n");
-               goto out_rel_header;
-       }
-
-       /* No action structures in the TRIGGER_ERROR table, nothing to do */
-       if (!trigger_tab->entry_count)
-               goto out_rel_header;
-
-       rc = -EIO;
-       table_size = trigger_tab->table_size;
-       r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
-                              table_size - sizeof(*trigger_tab),
-                              "APEI EINJ Trigger Table");
-       if (!r) {
-               pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
-                      (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
-                      (unsigned long long)trigger_paddr + table_size - 1);
-               goto out_rel_header;
-       }
-       iounmap(trigger_tab);
-       trigger_tab = ioremap_cache(trigger_paddr, table_size);
-       if (!trigger_tab) {
-               pr_err("Failed to map trigger table!\n");
-               goto out_rel_entry;
-       }
-       trigger_entry = (struct acpi_whea_header *)
-               ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
-       apei_resources_init(&trigger_resources);
-       apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
-                          ARRAY_SIZE(einj_ins_type),
-                          trigger_entry, trigger_tab->entry_count);
-       rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
-       if (rc)
-               goto out_fini;
-       rc = apei_resources_sub(&trigger_resources, &einj_resources);
-       if (rc)
-               goto out_fini;
-       /*
-        * Some firmware will access target address specified in
-        * param1 to trigger the error when injecting memory error.
-        * This will cause resource conflict with regular memory.  So
-        * remove it from trigger table resources.
-        */
-       if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
-               struct apei_resources addr_resources;
-
-               apei_resources_init(&addr_resources);
-               trigger_param_region = einj_get_trigger_parameter_region(
-                       trigger_tab, param1, param2);
-               if (trigger_param_region) {
-                       rc = apei_resources_add(&addr_resources,
-                               trigger_param_region->address,
-                               trigger_param_region->bit_width/8, true);
-                       if (rc)
-                               goto out_fini;
-                       rc = apei_resources_sub(&trigger_resources,
-                                       &addr_resources);
-               }
-               apei_resources_fini(&addr_resources);
-               if (rc)
-                       goto out_fini;
-       }
-       rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
-       if (rc)
-               goto out_fini;
-       rc = apei_exec_pre_map_gars(&trigger_ctx);
-       if (rc)
-               goto out_release;
-
-       rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR);
-
-       apei_exec_post_unmap_gars(&trigger_ctx);
-out_release:
-       apei_resources_release(&trigger_resources);
-out_fini:
-       apei_resources_fini(&trigger_resources);
-out_rel_entry:
-       release_mem_region(trigger_paddr + sizeof(*trigger_tab),
-                          table_size - sizeof(*trigger_tab));
-out_rel_header:
-       release_mem_region(trigger_paddr, sizeof(*trigger_tab));
-out:
-       if (trigger_tab)
-               iounmap(trigger_tab);
-
-       return rc;
-}
-
-static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
-                              u64 param3, u64 param4)
-{
-       struct apei_exec_context ctx;
-       u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
-       int rc;
-
-       einj_exec_ctx_init(&ctx);
-
-       rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
-       if (rc)
-               return rc;
-       apei_exec_ctx_set_input(&ctx, type);
-       if (acpi5) {
-               struct set_error_type_with_address *v5param = einj_param;
-
-               v5param->type = type;
-               if (type & ACPI5_VENDOR_BIT) {
-                       switch (vendor_flags) {
-                       case SETWA_FLAGS_APICID:
-                               v5param->apicid = param1;
-                               break;
-                       case SETWA_FLAGS_MEM:
-                               v5param->memory_address = param1;
-                               v5param->memory_address_range = param2;
-                               break;
-                       case SETWA_FLAGS_PCIE_SBDF:
-                               v5param->pcie_sbdf = param1;
-                               break;
-                       }
-                       v5param->flags = vendor_flags;
-               } else if (flags) {
-                       v5param->flags = flags;
-                       v5param->memory_address = param1;
-                       v5param->memory_address_range = param2;
-                       v5param->apicid = param3;
-                       v5param->pcie_sbdf = param4;
-               } else {
-                       switch (type) {
-                       case ACPI_EINJ_PROCESSOR_CORRECTABLE:
-                       case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
-                       case ACPI_EINJ_PROCESSOR_FATAL:
-                               v5param->apicid = param1;
-                               v5param->flags = SETWA_FLAGS_APICID;
-                               break;
-                       case ACPI_EINJ_MEMORY_CORRECTABLE:
-                       case ACPI_EINJ_MEMORY_UNCORRECTABLE:
-                       case ACPI_EINJ_MEMORY_FATAL:
-                               v5param->memory_address = param1;
-                               v5param->memory_address_range = param2;
-                               v5param->flags = SETWA_FLAGS_MEM;
-                               break;
-                       case ACPI_EINJ_PCIX_CORRECTABLE:
-                       case ACPI_EINJ_PCIX_UNCORRECTABLE:
-                       case ACPI_EINJ_PCIX_FATAL:
-                               v5param->pcie_sbdf = param1;
-                               v5param->flags = SETWA_FLAGS_PCIE_SBDF;
-                               break;
-                       }
-               }
-       } else {
-               rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
-               if (rc)
-                       return rc;
-               if (einj_param) {
-                       struct einj_parameter *v4param = einj_param;
-
-                       v4param->param1 = param1;
-                       v4param->param2 = param2;
-               }
-       }
-       rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
-       if (rc)
-               return rc;
-       for (;;) {
-               rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS);
-               if (rc)
-                       return rc;
-               val = apei_exec_ctx_get_output(&ctx);
-               if (!(val & EINJ_OP_BUSY))
-                       break;
-               if (einj_timedout(&timeout))
-                       return -EIO;
-       }
-       rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS);
-       if (rc)
-               return rc;
-       val = apei_exec_ctx_get_output(&ctx);
-       if (val == EINJ_STATUS_FAIL)
-               return -EBUSY;
-       else if (val == EINJ_STATUS_INVAL)
-               return -EINVAL;
-
-       /*
-        * The error is injected into the platform successfully, then it needs
-        * to trigger the error.
-        */
-       rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE);
-       if (rc)
-               return rc;
-       trigger_paddr = apei_exec_ctx_get_output(&ctx);
-       if (notrigger == 0) {
-               rc = __einj_error_trigger(trigger_paddr, type, param1, param2);
-               if (rc)
-                       return rc;
-       }
-       rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
-
-       return rc;
-}
-
-/* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
-                            u64 param3, u64 param4)
-{
-       int rc;
-       u64 base_addr, size;
-
-       /* If user manually set "flags", make sure it is legal */
-       if (flags && (flags &
-               ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
-               return -EINVAL;
-
-       /*
-        * We need extra sanity checks for memory errors.
-        * Other types leap directly to injection.
-        */
-
-       /* ensure param1/param2 existed */
-       if (!(param_extension || acpi5))
-               goto inject;
-
-       /* ensure injection is memory related */
-       if (type & ACPI5_VENDOR_BIT) {
-               if (vendor_flags != SETWA_FLAGS_MEM)
-                       goto inject;
-       } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
-               goto inject;
-
-       /*
-        * Disallow crazy address masks that give BIOS leeway to pick
-        * injection address almost anywhere. Insist on page or
-        * better granularity and that target address is normal RAM or
-        * NVDIMM.
-        */
-       base_addr = param1 & param2;
-       size = ~param2 + 1;
-
-       if (((param2 & PAGE_MASK) != PAGE_MASK) ||
-           ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
-                               != REGION_INTERSECTS) &&
-            (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
-                               != REGION_INTERSECTS) &&
-            (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
-                               != REGION_INTERSECTS) &&
-            !arch_is_platform_page(base_addr)))
-               return -EINVAL;
-
-       if (is_zero_pfn(base_addr >> PAGE_SHIFT))
-               return -EADDRINUSE;
-
-inject:
-       mutex_lock(&einj_mutex);
-       rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
-       mutex_unlock(&einj_mutex);
-
-       return rc;
-}
-
-static u32 error_type;
-static u32 error_flags;
-static u64 error_param1;
-static u64 error_param2;
-static u64 error_param3;
-static u64 error_param4;
-static struct dentry *einj_debug_dir;
-static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
-       { BIT(0), "Processor Correctable" },
-       { BIT(1), "Processor Uncorrectable non-fatal" },
-       { BIT(2), "Processor Uncorrectable fatal" },
-       { BIT(3), "Memory Correctable" },
-       { BIT(4), "Memory Uncorrectable non-fatal" },
-       { BIT(5), "Memory Uncorrectable fatal" },
-       { BIT(6), "PCI Express Correctable" },
-       { BIT(7), "PCI Express Uncorrectable non-fatal" },
-       { BIT(8), "PCI Express Uncorrectable fatal" },
-       { BIT(9), "Platform Correctable" },
-       { BIT(10), "Platform Uncorrectable non-fatal" },
-       { BIT(11), "Platform Uncorrectable fatal"},
-       { BIT(12), "CXL.cache Protocol Correctable" },
-       { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" },
-       { BIT(14), "CXL.cache Protocol Uncorrectable fatal" },
-       { BIT(15), "CXL.mem Protocol Correctable" },
-       { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" },
-       { BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
-       { BIT(31), "Vendor Defined Error Types" },
-};
-
-static int available_error_type_show(struct seq_file *m, void *v)
-{
-       int rc;
-       u32 error_type = 0;
-
-       rc = einj_get_available_error_type(&error_type);
-       if (rc)
-               return rc;
-       for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
-               if (error_type & einj_error_type_string[pos].mask)
-                       seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
-                                  einj_error_type_string[pos].str);
-
-       return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(available_error_type);
-
-static int error_type_get(void *data, u64 *val)
-{
-       *val = error_type;
-
-       return 0;
-}
-
-static int error_type_set(void *data, u64 val)
-{
-       int rc;
-       u32 available_error_type = 0;
-       u32 tval, vendor;
-
-       /* Only low 32 bits for error type are valid */
-       if (val & GENMASK_ULL(63, 32))
-               return -EINVAL;
-
-       /*
-        * Vendor defined types have 0x80000000 bit set, and
-        * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
-        */
-       vendor = val & ACPI5_VENDOR_BIT;
-       tval = val & 0x7fffffff;
-
-       /* Only one error type can be specified */
-       if (tval & (tval - 1))
-               return -EINVAL;
-       if (!vendor) {
-               rc = einj_get_available_error_type(&available_error_type);
-               if (rc)
-                       return rc;
-               if (!(val & available_error_type))
-                       return -EINVAL;
-       }
-       error_type = val;
-
-       return 0;
-}
-
-DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set,
-                        "0x%llx\n");
-
-static int error_inject_set(void *data, u64 val)
-{
-       if (!error_type)
-               return -EINVAL;
-
-       return einj_error_inject(error_type, error_flags, error_param1, error_param2,
-               error_param3, error_param4);
-}
-
-DEFINE_DEBUGFS_ATTRIBUTE(error_inject_fops, NULL, error_inject_set, "%llu\n");
-
-static int einj_check_table(struct acpi_table_einj *einj_tab)
-{
-       if ((einj_tab->header_length !=
-            (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header)))
-           && (einj_tab->header_length != sizeof(struct acpi_table_einj)))
-               return -EINVAL;
-       if (einj_tab->header.length < sizeof(struct acpi_table_einj))
-               return -EINVAL;
-       if (einj_tab->entries !=
-           (einj_tab->header.length - sizeof(struct acpi_table_einj)) /
-           sizeof(struct acpi_einj_entry))
-               return -EINVAL;
-
-       return 0;
-}
-
-static int __init einj_init(void)
-{
-       int rc;
-       acpi_status status;
-       struct apei_exec_context ctx;
-
-       if (acpi_disabled) {
-               pr_info("ACPI disabled.\n");
-               return -ENODEV;
-       }
-
-       status = acpi_get_table(ACPI_SIG_EINJ, 0,
-                               (struct acpi_table_header **)&einj_tab);
-       if (status == AE_NOT_FOUND) {
-               pr_warn("EINJ table not found.\n");
-               return -ENODEV;
-       } else if (ACPI_FAILURE(status)) {
-               pr_err("Failed to get EINJ table: %s\n",
-                               acpi_format_exception(status));
-               return -EINVAL;
-       }
-
-       rc = einj_check_table(einj_tab);
-       if (rc) {
-               pr_warn(FW_BUG "Invalid EINJ table.\n");
-               goto err_put_table;
-       }
-
-       rc = -ENOMEM;
-       einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
-
-       debugfs_create_file("available_error_type", S_IRUSR, einj_debug_dir,
-                           NULL, &available_error_type_fops);
-       debugfs_create_file_unsafe("error_type", 0600, einj_debug_dir,
-                                  NULL, &error_type_fops);
-       debugfs_create_file_unsafe("error_inject", 0200, einj_debug_dir,
-                                  NULL, &error_inject_fops);
-
-       apei_resources_init(&einj_resources);
-       einj_exec_ctx_init(&ctx);
-       rc = apei_exec_collect_resources(&ctx, &einj_resources);
-       if (rc) {
-               pr_err("Error collecting EINJ resources.\n");
-               goto err_fini;
-       }
-
-       rc = apei_resources_request(&einj_resources, "APEI EINJ");
-       if (rc) {
-               pr_err("Error requesting memory/port resources.\n");
-               goto err_fini;
-       }
-
-       rc = apei_exec_pre_map_gars(&ctx);
-       if (rc) {
-               pr_err("Error pre-mapping GARs.\n");
-               goto err_release;
-       }
-
-       einj_param = einj_get_parameter_address();
-       if ((param_extension || acpi5) && einj_param) {
-               debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir,
-                                  &error_flags);
-               debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir,
-                                  &error_param1);
-               debugfs_create_x64("param2", S_IRUSR | S_IWUSR, einj_debug_dir,
-                                  &error_param2);
-               debugfs_create_x64("param3", S_IRUSR | S_IWUSR, einj_debug_dir,
-                                  &error_param3);
-               debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir,
-                                  &error_param4);
-               debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
-                                  einj_debug_dir, &notrigger);
-       }
-
-       if (vendor_dev[0]) {
-               vendor_blob.data = vendor_dev;
-               vendor_blob.size = strlen(vendor_dev);
-               debugfs_create_blob("vendor", S_IRUSR, einj_debug_dir,
-                                   &vendor_blob);
-               debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
-                                  einj_debug_dir, &vendor_flags);
-       }
-
-       if (vendor_errors.size)
-               debugfs_create_blob("oem_error", 0600, einj_debug_dir,
-                                   &vendor_errors);
-
-       pr_info("Error INJection is initialized.\n");
-
-       return 0;
-
-err_release:
-       apei_resources_release(&einj_resources);
-err_fini:
-       apei_resources_fini(&einj_resources);
-       debugfs_remove_recursive(einj_debug_dir);
-err_put_table:
-       acpi_put_table((struct acpi_table_header *)einj_tab);
-
-       return rc;
-}
-
-static void __exit einj_exit(void)
-{
-       struct apei_exec_context ctx;
-
-       if (einj_param) {
-               acpi_size size = (acpi5) ?
-                       sizeof(struct set_error_type_with_address) :
-                       sizeof(struct einj_parameter);
-
-               acpi_os_unmap_iomem(einj_param, size);
-               if (vendor_errors.size)
-                       acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
-       }
-       einj_exec_ctx_init(&ctx);
-       apei_exec_post_unmap_gars(&ctx);
-       apei_resources_release(&einj_resources);
-       apei_resources_fini(&einj_resources);
-       debugfs_remove_recursive(einj_debug_dir);
-       acpi_put_table((struct acpi_table_header *)einj_tab);
-}
-
-module_init(einj_init);
-module_exit(einj_exit);
-
-MODULE_AUTHOR("Huang Ying");
-MODULE_DESCRIPTION("APEI Error INJection support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c

index d6b85f0..2c8ccc9 100644 (file)
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -59,9 +59,8 @@ struct target_cache {
  };
  
  enum {
-       NODE_ACCESS_CLASS_0 = 0,
-       NODE_ACCESS_CLASS_1,
-       NODE_ACCESS_CLASS_GENPORT_SINK,
+       NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL = ACCESS_COORDINATE_MAX,
+       NODE_ACCESS_CLASS_GENPORT_SINK_CPU,
         NODE_ACCESS_CLASS_MAX,
  };
  
@@ -75,6 +74,7 @@ struct memory_target {
         struct node_cache_attrs cache_attrs;
         u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
         bool registered;
+       bool ext_updated;       /* externally updated */
  };
  
  struct memory_initiator {
@@ -127,7 +127,8 @@ static struct memory_target *acpi_find_genport_target(u32 uid)
  /**
   * acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port
   * @uid: ACPI unique id
- * @coord: The access coordinates written back out for the generic port
+ * @coord: The access coordinates written back out for the generic port.
+ *        Expect 2 levels array.
   *
   * Return: 0 on success. Errno on failure.
   *
@@ -143,7 +144,10 @@ int acpi_get_genport_coordinates(u32 uid,
         if (!target)
                 return -ENOENT;
  
-       *coord = target->coord[NODE_ACCESS_CLASS_GENPORT_SINK];
+       coord[ACCESS_COORDINATE_LOCAL] =
+               target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL];
+       coord[ACCESS_COORDINATE_CPU] =
+               target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_CPU];
  
         return 0;
  }
@@ -325,6 +329,35 @@ static void hmat_update_target_access(struct memory_target *target,
         }
  }
  
+int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
+                                  enum access_coordinate_class access)
+{
+       struct memory_target *target;
+       int pxm;
+
+       if (nid == NUMA_NO_NODE)
+               return -EINVAL;
+
+       pxm = node_to_pxm(nid);
+       guard(mutex)(&target_lock);
+       target = find_mem_target(pxm);
+       if (!target)
+               return -ENODEV;
+
+       hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY,
+                                 coord->read_latency, access);
+       hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY,
+                                 coord->write_latency, access);
+       hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH,
+                                 coord->read_bandwidth, access);
+       hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH,
+                                 coord->write_bandwidth, access);
+       target->ext_updated = true;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(hmat_update_target_coordinates);
+
  static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
  {
         struct memory_locality *loc;
@@ -374,11 +407,11 @@ static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_px
  
         if (target && target->processor_pxm == init_pxm) {
                 hmat_update_target_access(target, type, value,
-                                         NODE_ACCESS_CLASS_0);
+                                         ACCESS_COORDINATE_LOCAL);
                 /* If the node has a CPU, update access 1 */
                 if (node_state(pxm_to_node(init_pxm), N_CPU))
                         hmat_update_target_access(target, type, value,
-                                                 NODE_ACCESS_CLASS_1);
+                                                 ACCESS_COORDINATE_CPU);
         }
  }
  
@@ -696,8 +729,13 @@ static void hmat_update_target_attrs(struct memory_target *target,
         u32 best = 0;
         int i;
  
+       /* Don't update if an external agent has changed the data.  */
+       if (target->ext_updated)
+               return;
+
         /* Don't update for generic port if there's no device handle */
-       if (access == NODE_ACCESS_CLASS_GENPORT_SINK &&
+       if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL ||
+            access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
             !(*(u16 *)target->gen_port_device_handle))
                 return;
  
@@ -709,7 +747,8 @@ static void hmat_update_target_attrs(struct memory_target *target,
          */
         if (target->processor_pxm != PXM_INVAL) {
                 cpu_nid = pxm_to_node(target->processor_pxm);
-               if (access == 0 || node_state(cpu_nid, N_CPU)) {
+               if (access == ACCESS_COORDINATE_LOCAL ||
+                   node_state(cpu_nid, N_CPU)) {
                         set_bit(target->processor_pxm, p_nodes);
                         return;
                 }
@@ -737,7 +776,9 @@ static void hmat_update_target_attrs(struct memory_target *target,
                 list_for_each_entry(initiator, &initiators, node) {
                         u32 value;
  
-                       if (access == 1 && !initiator->has_cpu) {
+                       if ((access == ACCESS_COORDINATE_CPU ||
+                            access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
+                           !initiator->has_cpu) {
                                 clear_bit(initiator->processor_pxm, p_nodes);
                                 continue;
                         }
@@ -770,20 +811,24 @@ static void __hmat_register_target_initiators(struct memory_target *target,
         }
  }
  
-static void hmat_register_generic_target_initiators(struct memory_target *target)
+static void hmat_update_generic_target(struct memory_target *target)
  {
         static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
  
-       __hmat_register_target_initiators(target, p_nodes,
-                                         NODE_ACCESS_CLASS_GENPORT_SINK);
+       hmat_update_target_attrs(target, p_nodes,
+                                NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL);
+       hmat_update_target_attrs(target, p_nodes,
+                                NODE_ACCESS_CLASS_GENPORT_SINK_CPU);
  }
  
  static void hmat_register_target_initiators(struct memory_target *target)
  {
         static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
  
-       __hmat_register_target_initiators(target, p_nodes, 0);
-       __hmat_register_target_initiators(target, p_nodes, 1);
+       __hmat_register_target_initiators(target, p_nodes,
+                                         ACCESS_COORDINATE_LOCAL);
+       __hmat_register_target_initiators(target, p_nodes,
+                                         ACCESS_COORDINATE_CPU);
  }
  
  static void hmat_register_target_cache(struct memory_target *target)
@@ -835,7 +880,7 @@ static void hmat_register_target(struct memory_target *target)
          */
         mutex_lock(&target_lock);
         if (*(u16 *)target->gen_port_device_handle) {
-               hmat_register_generic_target_initiators(target);
+               hmat_update_generic_target(target);
                 target->registered = true;
         }
         mutex_unlock(&target_lock);
@@ -854,8 +899,8 @@ static void hmat_register_target(struct memory_target *target)
         if (!target->registered) {
                 hmat_register_target_initiators(target);
                 hmat_register_target_cache(target);
-               hmat_register_target_perf(target, NODE_ACCESS_CLASS_0);
-               hmat_register_target_perf(target, NODE_ACCESS_CLASS_1);
+               hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL);
+               hmat_register_target_perf(target, ACCESS_COORDINATE_CPU);
                 target->registered = true;
         }
         mutex_unlock(&target_lock);
@@ -927,7 +972,7 @@ static int hmat_calculate_adistance(struct notifier_block *self,
                 return NOTIFY_OK;
  
         mutex_lock(&target_lock);
-       hmat_update_target_attrs(target, p_nodes, 1);
+       hmat_update_target_attrs(target, p_nodes, ACCESS_COORDINATE_CPU);
         mutex_unlock(&target_lock);
  
         perf = &target->coord[1];
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c

index 0214518..e45e649 100644 (file)
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES]
  unsigned char acpi_srat_revision __initdata;
  static int acpi_numa __initdata;
  
+static int last_real_pxm;
+
  void __init disable_srat(void)
  {
         acpi_numa = -1;
@@ -536,6 +538,7 @@ int __init acpi_numa_init(void)
                 if (node_to_pxm_map[i] > fake_pxm)
                         fake_pxm = node_to_pxm_map[i];
         }
+       last_real_pxm = fake_pxm;
         fake_pxm++;
         acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
                               &fake_pxm);
@@ -547,6 +550,14 @@ int __init acpi_numa_init(void)
         return 0;
  }
  
+bool acpi_node_backed_by_real_pxm(int nid)
+{
+       int pxm = node_to_pxm(nid);
+
+       return pxm <= last_real_pxm;
+}
+EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);
+
  static int acpi_get_pxm(acpi_handle h)
  {
         unsigned long long pxm;
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c

index b07f7d0..b976e5f 100644 (file)
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -253,7 +253,7 @@ int __init_or_acpilib acpi_table_parse_entries_array(
  
         count = acpi_parse_entries_array(id, table_size,
                                          (union fw_table_header *)table_header,
-                                        proc, proc_num, max_entries);
+                                        0, proc, proc_num, max_entries);
  
         acpi_put_table(table_header);
         return count;
diff --git a/drivers/base/node.c b/drivers/base/node.c

index 1c05640..eb72580 100644 (file)
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -126,7 +126,7 @@ static void node_access_release(struct device *dev)
  }
  
  static struct node_access_nodes *node_init_node_access(struct node *node,
-                                                      unsigned int access)
+                                                      enum access_coordinate_class access)
  {
         struct node_access_nodes *access_node;
         struct device *dev;
@@ -191,7 +191,7 @@ static struct attribute *access_attrs[] = {
   * @access: The access class the for the given attributes
   */
  void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
-                        unsigned int access)
+                        enum access_coordinate_class access)
  {
         struct node_access_nodes *c;
         struct node *node;
@@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
                 }
         }
  }
+EXPORT_SYMBOL_GPL(node_set_perf_attrs);
  
  /**
   * struct node_cache_info - Internal tracking for memory node caches
@@ -689,7 +690,7 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
   */
  int register_memory_node_under_compute_node(unsigned int mem_nid,
                                             unsigned int cpu_nid,
-                                           unsigned int access)
+                                           enum access_coordinate_class access)
  {
         struct node *init_node, *targ_node;
         struct node_access_nodes *initiator, *target;
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c

index 1a3e6aa..af5cb81 100644 (file)
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -530,13 +530,15 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
         if (kstrtou32(acpi_device_uid(hb), 0, &uid))
                 return -EINVAL;
  
-       rc = acpi_get_genport_coordinates(uid, &dport->hb_coord);
+       rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
         if (rc < 0)
                 return rc;
  
         /* Adjust back to picoseconds from nanoseconds */
-       dport->hb_coord.read_latency *= 1000;
-       dport->hb_coord.write_latency *= 1000;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               dport->hb_coord[i].read_latency *= 1000;
+               dport->hb_coord[i].write_latency *= 1000;
+       }
  
         return 0;
  }
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c

index 08fd0ba..eddbbe2 100644 (file)
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -9,6 +9,7 @@
  #include "cxlmem.h"
  #include "core.h"
  #include "cxl.h"
+#include "core.h"
  
  struct dsmas_entry {
         struct range dpa_range;
@@ -149,28 +150,35 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
         int rc;
  
         rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler,
-                             dsmas_xa, port->cdat.table);
+                             dsmas_xa, port->cdat.table, port->cdat.length);
         rc = cdat_table_parse_output(rc);
         if (rc)
                 return rc;
  
         rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler,
-                             dsmas_xa, port->cdat.table);
+                             dsmas_xa, port->cdat.table, port->cdat.length);
         return cdat_table_parse_output(rc);
  }
  
  static int cxl_port_perf_data_calculate(struct cxl_port *port,
                                         struct xarray *dsmas_xa)
  {
-       struct access_coordinate c;
+       struct access_coordinate ep_c;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
         struct dsmas_entry *dent;
         int valid_entries = 0;
         unsigned long index;
         int rc;
  
-       rc = cxl_endpoint_get_perf_coordinates(port, &c);
+       rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
         if (rc) {
-               dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n");
+               dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
+               return rc;
+       }
+
+       rc = cxl_hb_get_perf_coordinates(port, coord);
+       if (rc)  {
+               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
                 return rc;
         }
  
@@ -185,18 +193,19 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
         xa_for_each(dsmas_xa, index, dent) {
                 int qos_class;
  
-               dent->coord.read_latency = dent->coord.read_latency +
-                                          c.read_latency;
-               dent->coord.write_latency = dent->coord.write_latency +
-                                           c.write_latency;
-               dent->coord.read_bandwidth = min_t(int, c.read_bandwidth,
-                                                  dent->coord.read_bandwidth);
-               dent->coord.write_bandwidth = min_t(int, c.write_bandwidth,
-                                                   dent->coord.write_bandwidth);
-
+               cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
+               /*
+                * Keeping the host bridge coordinates separate from the dsmas
+                * coordinates in order to allow calculation of access class
+                * 0 and 1 for region later.
+                */
+               cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
+                                       &coord[ACCESS_COORDINATE_CPU],
+                                       &dent->coord);
                 dent->entries = 1;
-               rc = cxl_root->ops->qos_class(cxl_root, &dent->coord, 1,
-                                             &qos_class);
+               rc = cxl_root->ops->qos_class(cxl_root,
+                                             &coord[ACCESS_COORDINATE_CPU],
+                                             1, &qos_class);
                 if (rc != 1)
                         continue;
  
@@ -389,36 +398,38 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);
  static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
                                const unsigned long end)
  {
+       struct acpi_cdat_sslbis_table {
+               struct acpi_cdat_header header;
+               struct acpi_cdat_sslbis sslbis_header;
+               struct acpi_cdat_sslbe entries[];
+       } *tbl = (struct acpi_cdat_sslbis_table *)header;
+       int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header);
         struct acpi_cdat_sslbis *sslbis;
-       int size = sizeof(header->cdat) + sizeof(*sslbis);
         struct cxl_port *port = arg;
         struct device *dev = &port->dev;
-       struct acpi_cdat_sslbe *entry;
         int remain, entries, i;
         u16 len;
  
         len = le16_to_cpu((__force __le16)header->cdat.length);
         remain = len - size;
-       if (!remain || remain % sizeof(*entry) ||
+       if (!remain || remain % sizeof(tbl->entries[0]) ||
             (unsigned long)header + len > end) {
                 dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len);
                 return -EINVAL;
         }
  
-       /* Skip common header */
-       sslbis = (struct acpi_cdat_sslbis *)((unsigned long)header +
-                                            sizeof(header->cdat));
-
+       sslbis = &tbl->sslbis_header;
         /* Unrecognized data type, we can skip */
         if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH)
                 return 0;
  
-       entries = remain / sizeof(*entry);
-       entry = (struct acpi_cdat_sslbe *)((unsigned long)header + sizeof(*sslbis));
+       entries = remain / sizeof(tbl->entries[0]);
+       if (struct_size(tbl, entries, entries) != len)
+               return -EINVAL;
  
         for (i = 0; i < entries; i++) {
-               u16 x = le16_to_cpu((__force __le16)entry->portx_id);
-               u16 y = le16_to_cpu((__force __le16)entry->porty_id);
+               u16 x = le16_to_cpu((__force __le16)tbl->entries[i].portx_id);
+               u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id);
                 __le64 le_base;
                 __le16 le_val;
                 struct cxl_dport *dport;
@@ -448,8 +459,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
                         break;
                 }
  
-               le_base = (__force __le64)sslbis->entry_base_unit;
-               le_val = (__force __le16)entry->latency_or_bandwidth;
+               le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
+               le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
  
                 if (check_mul_overflow(le64_to_cpu(le_base),
                                        le16_to_cpu(le_val), &val))
@@ -462,8 +473,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
                                                           sslbis->data_type,
                                                           val);
                 }
-
-               entry++;
         }
  
         return 0;
@@ -477,11 +486,108 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
                 return;
  
         rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler,
-                             port, port->cdat.table);
+                             port, port->cdat.table, port->cdat.length);
         rc = cdat_table_parse_output(rc);
         if (rc)
                 dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc);
  }
  EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
  
+/**
+ * cxl_coordinates_combine - Combine the two input coordinates
+ *
+ * @out: Output coordinate of c1 and c2 combined
+ * @c1: input coordinates
+ * @c2: input coordinates
+ */
+void cxl_coordinates_combine(struct access_coordinate *out,
+                            struct access_coordinate *c1,
+                            struct access_coordinate *c2)
+{
+               if (c1->write_bandwidth && c2->write_bandwidth)
+                       out->write_bandwidth = min(c1->write_bandwidth,
+                                                  c2->write_bandwidth);
+               out->write_latency = c1->write_latency + c2->write_latency;
+
+               if (c1->read_bandwidth && c2->read_bandwidth)
+                       out->read_bandwidth = min(c1->read_bandwidth,
+                                                 c2->read_bandwidth);
+               out->read_latency = c1->read_latency + c2->read_latency;
+}
+
  MODULE_IMPORT_NS(CXL);
+
+void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
+                                   struct cxl_endpoint_decoder *cxled)
+{
+       struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+       struct cxl_port *port = cxlmd->endpoint;
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+       struct access_coordinate coord;
+       struct range dpa = {
+                       .start = cxled->dpa_res->start,
+                       .end = cxled->dpa_res->end,
+       };
+       struct cxl_dpa_perf *perf;
+       int rc;
+
+       switch (cxlr->mode) {
+       case CXL_DECODER_RAM:
+               perf = &mds->ram_perf;
+               break;
+       case CXL_DECODER_PMEM:
+               perf = &mds->pmem_perf;
+               break;
+       default:
+               return;
+       }
+
+       lockdep_assert_held(&cxl_dpa_rwsem);
+
+       if (!range_contains(&perf->dpa_range, &dpa))
+               return;
+
+       rc = cxl_hb_get_perf_coordinates(port, hb_coord);
+       if (rc)  {
+               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
+               return;
+       }
+
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               /* Pickup the host bridge coords */
+               cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
+
+               /* Get total bandwidth and the worst latency for the cxl region */
+               cxlr->coord[i].read_latency = max_t(unsigned int,
+                                                   cxlr->coord[i].read_latency,
+                                                   coord.read_latency);
+               cxlr->coord[i].write_latency = max_t(unsigned int,
+                                                    cxlr->coord[i].write_latency,
+                                                    coord.write_latency);
+               cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
+               cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
+
+               /*
+                * Convert latency to nanosec from picosec to be consistent
+                * with the resulting latency coordinates computed by the
+                * HMAT_REPORTING code.
+                */
+               cxlr->coord[i].read_latency =
+                       DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
+               cxlr->coord[i].write_latency =
+                       DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+       }
+}
+
+int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
+                                      enum access_coordinate_class access)
+{
+       return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
+}
+
+bool cxl_need_node_perf_attrs_update(int nid)
+{
+       return !acpi_node_backed_by_real_pxm(nid);
+}
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h

index 3b64fb1..bc5a956 100644 (file)
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -90,4 +90,8 @@ enum cxl_poison_trace_type {
  
  long cxl_pci_get_latency(struct pci_dev *pdev);
  
+int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
+                                      enum access_coordinate_class access);
+bool cxl_need_node_perf_attrs_update(int nid);
+
  #endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c

index e9e6c81..0df09bd 100644 (file)
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -518,14 +518,14 @@ EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
          FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
  
  static int cxl_cdat_get_length(struct device *dev,
-                              struct pci_doe_mb *cdat_doe,
+                              struct pci_doe_mb *doe_mb,
                                size_t *length)
  {
         __le32 request = CDAT_DOE_REQ(0);
         __le32 response[2];
         int rc;
  
-       rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL,
+       rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
                      CXL_DOE_PROTOCOL_TABLE_ACCESS,
                      &request, sizeof(request),
                      &response, sizeof(response));
@@ -543,56 +543,58 @@ static int cxl_cdat_get_length(struct device *dev,
  }
  
  static int cxl_cdat_read_table(struct device *dev,
-                              struct pci_doe_mb *cdat_doe,
-                              void *cdat_table, size_t *cdat_length)
+                              struct pci_doe_mb *doe_mb,
+                              struct cdat_doe_rsp *rsp, size_t *length)
  {
-       size_t length = *cdat_length + sizeof(__le32);
-       __le32 *data = cdat_table;
-       int entry_handle = 0;
+       size_t received, remaining = *length;
+       unsigned int entry_handle = 0;
+       union cdat_data *data;
         __le32 saved_dw = 0;
  
         do {
                 __le32 request = CDAT_DOE_REQ(entry_handle);
-               struct cdat_entry_header *entry;
-               size_t entry_dw;
                 int rc;
  
-               rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL,
+               rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
                              CXL_DOE_PROTOCOL_TABLE_ACCESS,
                              &request, sizeof(request),
-                            data, length);
+                            rsp, sizeof(*rsp) + remaining);
                 if (rc < 0) {
                         dev_err(dev, "DOE failed: %d", rc);
                         return rc;
                 }
  
-               /* 1 DW Table Access Response Header + CDAT entry */
-               entry = (struct cdat_entry_header *)(data + 1);
-               if ((entry_handle == 0 &&
-                    rc != sizeof(__le32) + sizeof(struct cdat_header)) ||
-                   (entry_handle > 0 &&
-                    (rc < sizeof(__le32) + sizeof(*entry) ||
-                     rc != sizeof(__le32) + le16_to_cpu(entry->length))))
+               if (rc < sizeof(*rsp))
                         return -EIO;
  
+               data = (union cdat_data *)rsp->data;
+               received = rc - sizeof(*rsp);
+
+               if (entry_handle == 0) {
+                       if (received != sizeof(data->header))
+                               return -EIO;
+               } else {
+                       if (received < sizeof(data->entry) ||
+                           received != le16_to_cpu(data->entry.length))
+                               return -EIO;
+               }
+
                 /* Get the CXL table access header entry handle */
                 entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-                                        le32_to_cpu(data[0]));
-               entry_dw = rc / sizeof(__le32);
-               /* Skip Header */
-               entry_dw -= 1;
+                                        le32_to_cpu(rsp->doe_header));
+
                 /*
                  * Table Access Response Header overwrote the last DW of
                  * previous entry, so restore that DW
                  */
-               *data = saved_dw;
-               length -= entry_dw * sizeof(__le32);
-               data += entry_dw;
-               saved_dw = *data;
+               rsp->doe_header = saved_dw;
+               remaining -= received;
+               rsp = (void *)rsp + received;
+               saved_dw = rsp->doe_header;
         } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
  
         /* Length in CDAT header may exceed concatenation of CDAT entries */
-       *cdat_length -= length - sizeof(__le32);
+       *length -= remaining;
  
         return 0;
  }
@@ -617,11 +619,11 @@ void read_cdat_data(struct cxl_port *port)
  {
         struct device *uport = port->uport_dev;
         struct device *dev = &port->dev;
-       struct pci_doe_mb *cdat_doe;
+       struct pci_doe_mb *doe_mb;
         struct pci_dev *pdev = NULL;
         struct cxl_memdev *cxlmd;
-       size_t cdat_length;
-       void *cdat_table, *cdat_buf;
+       struct cdat_doe_rsp *buf;
+       size_t table_length, length;
         int rc;
  
         if (is_cxl_memdev(uport)) {
@@ -638,39 +640,48 @@ void read_cdat_data(struct cxl_port *port)
         if (!pdev)
                 return;
  
-       cdat_doe = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL,
-                                       CXL_DOE_PROTOCOL_TABLE_ACCESS);
-       if (!cdat_doe) {
+       doe_mb = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL,
+                                     CXL_DOE_PROTOCOL_TABLE_ACCESS);
+       if (!doe_mb) {
                 dev_dbg(dev, "No CDAT mailbox\n");
                 return;
         }
  
         port->cdat_available = true;
  
-       if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
+       if (cxl_cdat_get_length(dev, doe_mb, &length)) {
                 dev_dbg(dev, "No CDAT length\n");
                 return;
         }
  
-       cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL);
-       if (!cdat_buf)
-               return;
+       /*
+        * The begin of the CDAT buffer needs space for additional 4
+        * bytes for the DOE header. Table data starts afterwards.
+        */
+       buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL);
+       if (!buf)
+               goto err;
+
+       table_length = length;
  
-       rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length);
+       rc = cxl_cdat_read_table(dev, doe_mb, buf, &length);
         if (rc)
                 goto err;
  
-       cdat_table = cdat_buf + sizeof(__le32);
-       if (cdat_checksum(cdat_table, cdat_length))
+       if (table_length != length)
+               dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n",
+                       table_length, length);
+
+       if (cdat_checksum(buf->data, length))
                 goto err;
  
-       port->cdat.table = cdat_table;
-       port->cdat.length = cdat_length;
-       return;
+       port->cdat.table = buf->data;
+       port->cdat.length = length;
  
+       return;
  err:
         /* Don't leave table data allocated on error */
-       devm_kfree(dev, cdat_buf);
+       devm_kfree(dev, buf);
         dev_err(dev, "Failed to read/validate CDAT.\n");
  }
  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c

index e59d9d3..2b0cab5 100644 (file)
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -3,6 +3,7 @@
  #include <linux/platform_device.h>
  #include <linux/memregion.h>
  #include <linux/workqueue.h>
+#include <linux/einj-cxl.h>
  #include <linux/debugfs.h>
  #include <linux/device.h>
  #include <linux/module.h>
@@ -793,6 +794,40 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
         return rc;
  }
  
+DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type);
+
+static int cxl_einj_inject(void *data, u64 type)
+{
+       struct cxl_dport *dport = data;
+
+       if (dport->rch)
+               return einj_cxl_inject_rch_error(dport->rcrb.base, type);
+
+       return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type);
+}
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject,
+                        "0x%llx\n");
+
+static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
+{
+       struct dentry *dir;
+
+       if (!einj_cxl_is_initialized())
+               return;
+
+       /*
+        * dport_dev needs to be a PCIe port for CXL 2.0+ ports because
+        * EINJ expects a dport SBDF to be specified for 2.0 error injection.
+        */
+       if (!dport->rch && !dev_is_pci(dport->dport_dev))
+               return;
+
+       dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
+
+       debugfs_create_file("einj_inject", 0200, dir, dport,
+                           &cxl_einj_inject_fops);
+}
+
  static struct cxl_port *__devm_cxl_add_port(struct device *host,
                                             struct device *uport_dev,
                                             resource_size_t component_reg_phys,
@@ -822,6 +857,7 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
                  */
                 port->reg_map = cxlds->reg_map;
                 port->reg_map.host = &port->dev;
+               cxlmd->endpoint = port;
         } else if (parent_dport) {
                 rc = dev_set_name(dev, "port%d", port->id);
                 if (rc)
@@ -1149,6 +1185,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
         if (dev_is_pci(dport_dev))
                 dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev));
  
+       cxl_debugfs_create_dport_dir(dport);
+
         return dport;
  }
  
@@ -1374,7 +1412,6 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
  
         get_device(host);
         get_device(&endpoint->dev);
-       cxlmd->endpoint = endpoint;
         cxlmd->depth = endpoint->depth;
         return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
  }
@@ -2096,18 +2133,36 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
  }
  EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
  
-static void combine_coordinates(struct access_coordinate *c1,
-                               struct access_coordinate *c2)
+/**
+ * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
+ *                              and host bridge
+ *
+ * @port: endpoint cxl_port
+ * @coord: output access coordinates
+ *
+ * Return: errno on failure, 0 on success.
+ */
+int cxl_hb_get_perf_coordinates(struct cxl_port *port,
+                               struct access_coordinate *coord)
  {
-               if (c2->write_bandwidth)
-                       c1->write_bandwidth = min(c1->write_bandwidth,
-                                                 c2->write_bandwidth);
-               c1->write_latency += c2->write_latency;
+       struct cxl_port *iter = port;
+       struct cxl_dport *dport;
+
+       if (!is_cxl_endpoint(port))
+               return -EINVAL;
  
-               if (c2->read_bandwidth)
-                       c1->read_bandwidth = min(c1->read_bandwidth,
-                                                c2->read_bandwidth);
-               c1->read_latency += c2->read_latency;
+       dport = iter->parent_dport;
+       while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
+               iter = to_cxl_port(iter->dev.parent);
+               dport = iter->parent_dport;
+       }
+
+       coord[ACCESS_COORDINATE_LOCAL] =
+               dport->hb_coord[ACCESS_COORDINATE_LOCAL];
+       coord[ACCESS_COORDINATE_CPU] =
+               dport->hb_coord[ACCESS_COORDINATE_CPU];
+
+       return 0;
  }
  
  /**
@@ -2143,7 +2198,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
          * nothing to gather.
          */
         while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
-               combine_coordinates(&c, &dport->sw_coord);
+               cxl_coordinates_combine(&c, &c, &dport->sw_coord);
                 c.write_latency += dport->link_latency;
                 c.read_latency += dport->link_latency;
  
@@ -2151,9 +2206,6 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                 dport = iter->parent_dport;
         }
  
-       /* Augment with the generic port (host bridge) perf data */
-       combine_coordinates(&c, &dport->hb_coord);
-
         /* Get the calculated PCI paths bandwidth */
         pdev = to_pci_dev(port->uport_dev->parent);
         bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL);
@@ -2221,6 +2273,10 @@ static __init int cxl_core_init(void)
  
         cxl_debugfs = debugfs_create_dir("cxl", NULL);
  
+       if (einj_cxl_is_initialized())
+               debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL,
+                                   &einj_cxl_available_error_type_fops);
+
         cxl_mbox_init();
  
         rc = cxl_memdev_init();
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c

index 4c7fd2d..5c186e0 100644 (file)
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -4,6 +4,7 @@
  #include <linux/genalloc.h>
  #include <linux/device.h>
  #include <linux/module.h>
+#include <linux/memory.h>
  #include <linux/slab.h>
  #include <linux/uuid.h>
  #include <linux/sort.h>
@@ -30,6 +31,108 @@
  
  static struct cxl_region *to_cxl_region(struct device *dev);
  
+#define __ACCESS_ATTR_RO(_level, _name) {                              \
+       .attr   = { .name = __stringify(_name), .mode = 0444 },         \
+       .show   = _name##_access##_level##_show,                        \
+}
+
+#define ACCESS_DEVICE_ATTR_RO(level, name)     \
+       struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
+
+#define ACCESS_ATTR_RO(level, attrib)                                        \
+static ssize_t attrib##_access##level##_show(struct device *dev,             \
+                                         struct device_attribute *attr,      \
+                                         char *buf)                          \
+{                                                                            \
+       struct cxl_region *cxlr = to_cxl_region(dev);                         \
+                                                                             \
+       if (cxlr->coord[level].attrib == 0)                                   \
+               return -ENOENT;                                               \
+                                                                             \
+       return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib);            \
+}                                                                            \
+static ACCESS_DEVICE_ATTR_RO(level, attrib)
+
+ACCESS_ATTR_RO(0, read_bandwidth);
+ACCESS_ATTR_RO(0, read_latency);
+ACCESS_ATTR_RO(0, write_bandwidth);
+ACCESS_ATTR_RO(0, write_latency);
+
+#define ACCESS_ATTR_DECLARE(level, attrib)     \
+       (&dev_attr_access##level##_##attrib.attr)
+
+static struct attribute *access0_coordinate_attrs[] = {
+       ACCESS_ATTR_DECLARE(0, read_bandwidth),
+       ACCESS_ATTR_DECLARE(0, write_bandwidth),
+       ACCESS_ATTR_DECLARE(0, read_latency),
+       ACCESS_ATTR_DECLARE(0, write_latency),
+       NULL
+};
+
+ACCESS_ATTR_RO(1, read_bandwidth);
+ACCESS_ATTR_RO(1, read_latency);
+ACCESS_ATTR_RO(1, write_bandwidth);
+ACCESS_ATTR_RO(1, write_latency);
+
+static struct attribute *access1_coordinate_attrs[] = {
+       ACCESS_ATTR_DECLARE(1, read_bandwidth),
+       ACCESS_ATTR_DECLARE(1, write_bandwidth),
+       ACCESS_ATTR_DECLARE(1, read_latency),
+       ACCESS_ATTR_DECLARE(1, write_latency),
+       NULL
+};
+
+#define ACCESS_VISIBLE(level)                                          \
+static umode_t cxl_region_access##level##_coordinate_visible(          \
+               struct kobject *kobj, struct attribute *a, int n)       \
+{                                                                      \
+       struct device *dev = kobj_to_dev(kobj);                         \
+       struct cxl_region *cxlr = to_cxl_region(dev);                   \
+                                                                       \
+       if (a == &dev_attr_access##level##_read_latency.attr &&         \
+           cxlr->coord[level].read_latency == 0)                       \
+               return 0;                                               \
+                                                                       \
+       if (a == &dev_attr_access##level##_write_latency.attr &&        \
+           cxlr->coord[level].write_latency == 0)                      \
+               return 0;                                               \
+                                                                       \
+       if (a == &dev_attr_access##level##_read_bandwidth.attr &&       \
+           cxlr->coord[level].read_bandwidth == 0)                     \
+               return 0;                                               \
+                                                                       \
+       if (a == &dev_attr_access##level##_write_bandwidth.attr &&      \
+           cxlr->coord[level].write_bandwidth == 0)                    \
+               return 0;                                               \
+                                                                       \
+       return a->mode;                                                 \
+}
+
+ACCESS_VISIBLE(0);
+ACCESS_VISIBLE(1);
+
+static const struct attribute_group cxl_region_access0_coordinate_group = {
+       .name = "access0",
+       .attrs = access0_coordinate_attrs,
+       .is_visible = cxl_region_access0_coordinate_visible,
+};
+
+static const struct attribute_group *get_cxl_region_access0_group(void)
+{
+       return &cxl_region_access0_coordinate_group;
+}
+
+static const struct attribute_group cxl_region_access1_coordinate_group = {
+       .name = "access1",
+       .attrs = access1_coordinate_attrs,
+       .is_visible = cxl_region_access1_coordinate_visible,
+};
+
+static const struct attribute_group *get_cxl_region_access1_group(void)
+{
+       return &cxl_region_access1_coordinate_group;
+}
+
  static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
  {
@@ -1752,6 +1855,8 @@ static int cxl_region_attach(struct cxl_region *cxlr,
                 return -EINVAL;
         }
  
+       cxl_region_perf_data_calculate(cxlr, cxled);
+
         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
                 int i;
  
@@ -2067,6 +2172,8 @@ static const struct attribute_group *region_groups[] = {
         &cxl_base_attribute_group,
         &cxl_region_group,
         &cxl_region_target_group,
+       &cxl_region_access0_coordinate_group,
+       &cxl_region_access1_coordinate_group,
         NULL,
  };
  
@@ -2120,6 +2227,7 @@ static void unregister_region(void *_cxlr)
         struct cxl_region_params *p = &cxlr->params;
         int i;
  
+       unregister_memory_notifier(&cxlr->memory_notifier);
         device_del(&cxlr->dev);
  
         /*
@@ -2164,6 +2272,63 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
         return cxlr;
  }
  
+static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
+{
+       int cset = 0;
+       int rc;
+
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               if (cxlr->coord[i].read_bandwidth) {
+                       rc = 0;
+                       if (cxl_need_node_perf_attrs_update(nid))
+                               node_set_perf_attrs(nid, &cxlr->coord[i], i);
+                       else
+                               rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
+
+                       if (rc == 0)
+                               cset++;
+               }
+       }
+
+       if (!cset)
+               return false;
+
+       rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
+       if (rc)
+               dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
+
+       rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
+       if (rc)
+               dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
+
+       return true;
+}
+
+static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
+                                         unsigned long action, void *arg)
+{
+       struct cxl_region *cxlr = container_of(nb, struct cxl_region,
+                                              memory_notifier);
+       struct cxl_region_params *p = &cxlr->params;
+       struct cxl_endpoint_decoder *cxled = p->targets[0];
+       struct cxl_decoder *cxld = &cxled->cxld;
+       struct memory_notify *mnb = arg;
+       int nid = mnb->status_change_nid;
+       int region_nid;
+
+       if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+               return NOTIFY_DONE;
+
+       region_nid = phys_to_target_node(cxld->hpa_range.start);
+       if (nid != region_nid)
+               return NOTIFY_DONE;
+
+       if (!cxl_region_update_coordinates(cxlr, nid))
+               return NOTIFY_DONE;
+
+       return NOTIFY_OK;
+}
+
  /**
   * devm_cxl_add_region - Adds a region to a decoder
   * @cxlrd: root decoder
@@ -2211,6 +2376,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
         if (rc)
                 goto err;
  
+       cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
+       cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
+       register_memory_notifier(&cxlr->memory_notifier);
+
         rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
         if (rc)
                 return ERR_PTR(rc);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h

index 003feeb..534e25e 100644 (file)
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -6,6 +6,7 @@
  
  #include <linux/libnvdimm.h>
  #include <linux/bitfield.h>
+#include <linux/notifier.h>
  #include <linux/bitops.h>
  #include <linux/log2.h>
  #include <linux/node.h>
@@ -517,6 +518,8 @@ struct cxl_region_params {
   * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
   * @flags: Region state flags
   * @params: active + config params for the region
+ * @coord: QoS access coordinates for the region
+ * @memory_notifier: notifier for setting the access coordinates to node
   */
  struct cxl_region {
         struct device dev;
@@ -527,6 +530,8 @@ struct cxl_region {
         struct cxl_pmem_region *cxlr_pmem;
         unsigned long flags;
         struct cxl_region_params params;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+       struct notifier_block memory_notifier;
  };
  
  struct cxl_nvdimm_bridge {
@@ -671,7 +676,7 @@ struct cxl_dport {
         struct cxl_port *port;
         struct cxl_regs regs;
         struct access_coordinate sw_coord;
-       struct access_coordinate hb_coord;
+       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
         long link_latency;
  };
  
@@ -879,9 +884,17 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
  
  int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                       struct access_coordinate *coord);
+int cxl_hb_get_perf_coordinates(struct cxl_port *port,
+                               struct access_coordinate *coord);
+void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
+                                   struct cxl_endpoint_decoder *cxled);
  
  void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
  
+void cxl_coordinates_combine(struct access_coordinate *out,
+                            struct access_coordinate *c1,
+                            struct access_coordinate *c2);
+
  /*
   * Unit test builds overrides this to __weak, find the 'strong' version
   * of these symbols in tools/testing/cxl/.
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h

index 711b05d..93992a1 100644 (file)
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -71,6 +71,15 @@ enum cxl_regloc_type {
         CXL_REGLOC_RBI_TYPES
  };
  
+/*
+ * Table Access DOE, CDAT Read Entry Response
+ *
+ * Spec refs:
+ *
+ * CXL 3.1 8.1.11, Table 8-14: Read Entry Response
+ * CDAT Specification 1.03: 2 CDAT Data Structures
+ */
+
  struct cdat_header {
         __le32 length;
         u8 revision;
@@ -85,6 +94,21 @@ struct cdat_entry_header {
         __le16 length;
  } __packed;
  
+/*
+ * The DOE CDAT read response contains a CDAT read entry (either the
+ * CDAT header or a structure).
+ */
+union cdat_data {
+       struct cdat_header header;
+       struct cdat_entry_header entry;
+} __packed;
+
+/* There is an additional CDAT response header of 4 bytes. */
+struct cdat_doe_rsp {
+       __le32 doe_header;
+       u8 data[];
+} __packed;
+
  /*
   * CXL v3.0 6.2.3 Table 6-4
   * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index a170c38..34829f2 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1548,4 +1548,25 @@ static inline void acpi_use_parent_companion(struct device *dev)
         ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent));
  }
  
+#ifdef CONFIG_ACPI_HMAT
+int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
+                                  enum access_coordinate_class access);
+#else
+static inline int hmat_update_target_coordinates(int nid,
+                                                struct access_coordinate *coord,
+                                                enum access_coordinate_class access)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+bool acpi_node_backed_by_real_pxm(int nid);
+#else
+static inline bool acpi_node_backed_by_real_pxm(int nid)
+{
+       return false;
+}
+#endif
+
  #endif /*_LINUX_ACPI_H*/
diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h

new file mode 100644 (file)

index 0000000..624ff6f
--- /dev/null
+++ b/include/linux/einj-cxl.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CXL protocol Error INJection support.
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#ifndef EINJ_CXL_H
+#define EINJ_CXL_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct pci_dev;
+struct seq_file;
+
+#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL)
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
+int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
+bool einj_cxl_is_initialized(void);
+#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */
+static inline int einj_cxl_available_error_type_show(struct seq_file *m,
+                                                    void *v)
+{
+       return -ENXIO;
+}
+
+static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type)
+{
+       return -ENXIO;
+}
+
+static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+       return -ENXIO;
+}
+
+static inline bool einj_cxl_is_initialized(void) { return false; }
+#endif /* CONFIG_ACPI_APEI_EINJ_CXL */
+
+#endif /* EINJ_CXL_H */
diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h

index 9542186..3ff4c27 100644 (file)
--- a/include/linux/fw_table.h
+++ b/include/linux/fw_table.h
@@ -40,12 +40,14 @@ union acpi_subtable_headers {
  
  int acpi_parse_entries_array(char *id, unsigned long table_size,
                              union fw_table_header *table_header,
+                            unsigned long max_length,
                              struct acpi_subtable_proc *proc,
                              int proc_num, unsigned int max_entries);
  
  int cdat_table_parse(enum acpi_cdat_type type,
                      acpi_tbl_entry_handler_arg handler_arg, void *arg,
-                    struct acpi_table_cdat *table_header);
+                    struct acpi_table_cdat *table_header,
+                    unsigned long length);
  
  /* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */
  #if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS)
diff --git a/include/linux/memory.h b/include/linux/memory.h

index 939a16b..c0afee5 100644 (file)
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -123,6 +123,7 @@ struct mem_section;
  #define DEFAULT_CALLBACK_PRI   0
  #define SLAB_CALLBACK_PRI      1
  #define HMAT_CALLBACK_PRI      2
+#define CXL_CALLBACK_PRI       5
  #define MM_COMPUTE_BATCH_PRI   10
  #define CPUSET_CALLBACK_PRI    10
  #define MEMTIER_HOTPLUG_PRI    100
diff --git a/include/linux/node.h b/include/linux/node.h

index 25b66d7..dfc004e 100644 (file)
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -34,6 +34,18 @@ struct access_coordinate {
         unsigned int write_latency;
  };
  
+/*
+ * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0
+ *     - access_coordinate between target node and nearest initiator node
+ * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1
+ *     - access_coordinate between target node and nearest CPU node
+ */
+enum access_coordinate_class {
+       ACCESS_COORDINATE_LOCAL,
+       ACCESS_COORDINATE_CPU,
+       ACCESS_COORDINATE_MAX
+};
+
  enum cache_indexing {
         NODE_CACHE_DIRECT_MAP,
         NODE_CACHE_INDEXED,
@@ -66,7 +78,7 @@ struct node_cache_attrs {
  #ifdef CONFIG_HMEM_REPORTING
  void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
  void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
-                        unsigned access);
+                        enum access_coordinate_class access);
  #else
  static inline void node_add_cache(unsigned int nid,
                                   struct node_cache_attrs *cache_attrs)
@@ -75,7 +87,7 @@ static inline void node_add_cache(unsigned int nid,
  
  static inline void node_set_perf_attrs(unsigned int nid,
                                        struct access_coordinate *coord,
-                                      unsigned access)
+                                      enum access_coordinate_class access)
  {
  }
  #endif
@@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
  
  extern int register_memory_node_under_compute_node(unsigned int mem_nid,
                                                    unsigned int cpu_nid,
-                                                  unsigned access);
+                                                  enum access_coordinate_class access);
  #else
  static inline void node_dev_init(void)
  {
diff --git a/lib/fw_table.c b/lib/fw_table.c

index c3569d2..1629181 100644 (file)
--- a/lib/fw_table.c
+++ b/lib/fw_table.c
@@ -127,6 +127,7 @@ static __init_or_fwtbl_lib int call_handler(struct acpi_subtable_proc *proc,
   *
   * @id: table id (for debugging purposes)
   * @table_size: size of the root table
+ * @max_length: maximum size of the table (ignore if 0)
   * @table_header: where does the table start?
   * @proc: array of acpi_subtable_proc struct containing entry id
   *        and associated handler with it
@@ -148,18 +149,21 @@ static __init_or_fwtbl_lib int call_handler(struct acpi_subtable_proc *proc,
  int __init_or_fwtbl_lib
  acpi_parse_entries_array(char *id, unsigned long table_size,
                          union fw_table_header *table_header,
+                        unsigned long max_length,
                          struct acpi_subtable_proc *proc,
                          int proc_num, unsigned int max_entries)
  {
-       unsigned long table_end, subtable_len, entry_len;
+       unsigned long table_len, table_end, subtable_len, entry_len;
         struct acpi_subtable_entry entry;
         enum acpi_subtable_type type;
         int count = 0;
         int i;
  
         type = acpi_get_subtable_type(id);
-       table_end = (unsigned long)table_header +
-                   acpi_table_get_length(type, table_header);
+       table_len = acpi_table_get_length(type, table_header);
+       if (max_length && max_length < table_len)
+               table_len = max_length;
+       table_end = (unsigned long)table_header + table_len;
  
         /* Parse all entries looking for a match. */
  
@@ -208,7 +212,8 @@ int __init_or_fwtbl_lib
  cdat_table_parse(enum acpi_cdat_type type,
                  acpi_tbl_entry_handler_arg handler_arg,
                  void *arg,
-                struct acpi_table_cdat *table_header)
+                struct acpi_table_cdat *table_header,
+                unsigned long length)
  {
         struct acpi_subtable_proc proc = {
                 .id             = type,
@@ -222,6 +227,6 @@ cdat_table_parse(enum acpi_cdat_type type,
         return acpi_parse_entries_array(ACPI_SIG_CDAT,
                                         sizeof(struct acpi_table_cdat),
                                         (union fw_table_header *)table_header,
-                                       &proc, 1, 0);
+                                       length, &proc, 1, 0);
  }
  EXPORT_SYMBOL_FWTBL_LIB(cdat_table_parse);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 16 Mar 2024 17:04:12 +0000 (10:04 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 16 Mar 2024 17:04:12 +0000 (10:04 -0700)
Documentation/ABI/testing/debugfs-cxl		patch \| blob \| history
Documentation/ABI/testing/sysfs-bus-cxl		patch \| blob \| history
Documentation/firmware-guide/acpi/apei/einj.rst		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
drivers/acpi/apei/Kconfig		patch \| blob \| history
drivers/acpi/apei/Makefile		patch \| blob \| history
drivers/acpi/apei/apei-internal.h		patch \| blob \| history
drivers/acpi/apei/einj-core.c	[new file with mode: 0644]	patch \| blob
drivers/acpi/apei/einj-cxl.c	[new file with mode: 0644]	patch \| blob
drivers/acpi/apei/einj.c	[deleted file]	patch \| blob \| history
drivers/acpi/numa/hmat.c		patch \| blob \| history
drivers/acpi/numa/srat.c		patch \| blob \| history
drivers/acpi/tables.c		patch \| blob \| history
drivers/base/node.c		patch \| blob \| history
drivers/cxl/acpi.c		patch \| blob \| history
drivers/cxl/core/cdat.c		patch \| blob \| history
drivers/cxl/core/core.h		patch \| blob \| history
drivers/cxl/core/pci.c		patch \| blob \| history
drivers/cxl/core/port.c		patch \| blob \| history
drivers/cxl/core/region.c		patch \| blob \| history
drivers/cxl/cxl.h		patch \| blob \| history
drivers/cxl/cxlpci.h		patch \| blob \| history
include/linux/acpi.h		patch \| blob \| history
include/linux/einj-cxl.h	[new file with mode: 0644]	patch \| blob
include/linux/fw_table.h		patch \| blob \| history
include/linux/memory.h		patch \| blob \| history
include/linux/node.h		patch \| blob \| history
lib/fw_table.c		patch \| blob \| history