1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/module.h>
5 #include <linux/sizes.h>
6 #include <linux/mutex.h>
7 #include <linux/list.h>
17 * This implements the PCI exclusive functionality for a CXL device as it is
18 * defined by the Compute Express Link specification. CXL devices may surface
19 * certain functionality even if it isn't CXL enabled. While this driver is
20 * focused around the PCI specific aspects of a CXL device, it binds to the
21 * specific CXL memory device class code, and therefore the implementation of
22 * cxl_pci is focused around CXL memory devices.
24 * The driver has several responsibilities, mainly:
25 * - Create the memX device and register on the CXL bus.
26 * - Enumerate device's register interface and map them.
27 * - Registers nvdimm bridge device with cxl_core.
28 * - Registers a CXL mailbox with cxl_core.
31 #define cxl_doorbell_busy(cxlm) \
32 (readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \
33 CXLDEV_MBOX_CTRL_DOORBELL)
35 /* CXL 2.0 - 8.2.8.4 */
36 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
38 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_mem *cxlm)
40 const unsigned long start = jiffies;
41 unsigned long end = start;
43 while (cxl_doorbell_busy(cxlm)) {
46 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
47 /* Check again in case preempted before timeout test */
48 if (!cxl_doorbell_busy(cxlm))
55 dev_dbg(cxlm->dev, "Doorbell wait took %dms",
56 jiffies_to_msecs(end) - jiffies_to_msecs(start));
60 static void cxl_pci_mbox_timeout(struct cxl_mem *cxlm,
61 struct cxl_mbox_cmd *mbox_cmd)
63 struct device *dev = cxlm->dev;
65 dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
66 mbox_cmd->opcode, mbox_cmd->size_in);
70 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
71 * @cxlm: The CXL memory device to communicate with.
72 * @mbox_cmd: Command to send to the memory device.
74 * Context: Any context. Expects mbox_mutex to be held.
75 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
76 * Caller should check the return code in @mbox_cmd to make sure it
79 * This is a generic form of the CXL mailbox send command thus only using the
80 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
81 * devices, and perhaps other types of CXL devices may have further information
82 * available upon error conditions. Driver facilities wishing to send mailbox
83 * commands should use the wrapper command.
85 * The CXL spec allows for up to two mailboxes. The intention is for the primary
86 * mailbox to be OS controlled and the secondary mailbox to be used by system
87 * firmware. This allows the OS and firmware to communicate with the device and
88 * not need to coordinate with each other. The driver only uses the primary
91 static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
92 struct cxl_mbox_cmd *mbox_cmd)
94 void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
95 struct device *dev = cxlm->dev;
96 u64 cmd_reg, status_reg;
100 lockdep_assert_held(&cxlm->mbox_mutex);
103 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
104 * 1. Caller reads MB Control Register to verify doorbell is clear
105 * 2. Caller writes Command Register
106 * 3. Caller writes Command Payload Registers if input payload is non-empty
107 * 4. Caller writes MB Control Register to set doorbell
108 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured
109 * 6. Caller reads MB Status Register to fetch Return code
110 * 7. If command successful, Caller reads Command Register to get Payload Length
111 * 8. If output payload is non-empty, host reads Command Payload Registers
113 * Hardware is free to do whatever it wants before the doorbell is rung,
114 * and isn't allowed to change anything after it clears the doorbell. As
115 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
116 * also happen in any order (though some orders might not make sense).
120 if (cxl_doorbell_busy(cxlm)) {
121 dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
125 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
127 if (mbox_cmd->size_in) {
128 if (WARN_ON(!mbox_cmd->payload_in))
131 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
133 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
137 writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
140 dev_dbg(dev, "Sending command\n");
141 writel(CXLDEV_MBOX_CTRL_DOORBELL,
142 cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
145 rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
146 if (rc == -ETIMEDOUT) {
147 cxl_pci_mbox_timeout(cxlm, mbox_cmd);
152 status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
153 mbox_cmd->return_code =
154 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
156 if (mbox_cmd->return_code != 0) {
157 dev_dbg(dev, "Mailbox operation had an error\n");
162 cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
163 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
166 if (out_len && mbox_cmd->payload_out) {
168 * Sanitize the copy. If hardware misbehaves, out_len per the
169 * spec can actually be greater than the max allowed size (21
170 * bits available but spec defined 1M max). The caller also may
171 * have requested less data than the hardware supplied even
174 size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
176 memcpy_fromio(mbox_cmd->payload_out, payload, n);
177 mbox_cmd->size_out = n;
179 mbox_cmd->size_out = 0;
186 * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
187 * @cxlm: The memory device to gain access to.
189 * Context: Any context. Takes the mbox_mutex.
190 * Return: 0 if exclusive access was acquired.
192 static int cxl_pci_mbox_get(struct cxl_mem *cxlm)
194 struct device *dev = cxlm->dev;
198 mutex_lock_io(&cxlm->mbox_mutex);
201 * XXX: There is some amount of ambiguity in the 2.0 version of the spec
202 * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the
203 * bit is to allow firmware running on the device to notify the driver
204 * that it's ready to receive commands. It is unclear if the bit needs
205 * to be read for each transaction mailbox, ie. the firmware can switch
206 * it on and off as needed. Second, there is no defined timeout for
207 * mailbox ready, like there is for the doorbell interface.
210 * 1. The firmware might toggle the Mailbox Interface Ready bit, check
211 * it for every command.
213 * 2. If the doorbell is clear, the firmware should have first set the
214 * Mailbox Interface Ready bit. Therefore, waiting for the doorbell
215 * to be ready is sufficient.
217 rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
219 dev_warn(dev, "Mailbox interface not ready\n");
223 md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
224 if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
225 dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
231 * Hardware shouldn't allow a ready status but also have failure bits
232 * set. Spit out an error, this should be a bug report
235 if (md_status & CXLMDEV_DEV_FATAL) {
236 dev_err(dev, "mbox: reported ready, but fatal\n");
239 if (md_status & CXLMDEV_FW_HALT) {
240 dev_err(dev, "mbox: reported ready, but halted\n");
243 if (CXLMDEV_RESET_NEEDED(md_status)) {
244 dev_err(dev, "mbox: reported ready, but reset needed\n");
252 mutex_unlock(&cxlm->mbox_mutex);
257 * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
258 * @cxlm: The CXL memory device to communicate with.
260 * Context: Any context. Expects mbox_mutex to be held.
262 static void cxl_pci_mbox_put(struct cxl_mem *cxlm)
264 mutex_unlock(&cxlm->mbox_mutex);
267 static int cxl_pci_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
271 rc = cxl_pci_mbox_get(cxlm);
275 rc = __cxl_pci_mbox_send_cmd(cxlm, cmd);
276 cxl_pci_mbox_put(cxlm);
281 static int cxl_pci_setup_mailbox(struct cxl_mem *cxlm)
283 const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
285 cxlm->mbox_send = cxl_pci_mbox_send;
287 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
290 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
292 * If the size is too small, mandatory commands will not work and so
293 * there's no point in going forward. If the size is too large, there's
294 * no harm is soft limiting it.
296 cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
297 if (cxlm->payload_size < 256) {
298 dev_err(cxlm->dev, "Mailbox is too small (%zub)",
303 dev_dbg(cxlm->dev, "Mailbox payload sized %zu",
309 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
312 int bar = map->barno;
313 struct device *dev = &pdev->dev;
314 resource_size_t offset = map->block_offset;
316 /* Basic sanity check that BAR is big enough */
317 if (pci_resource_len(pdev, bar) < offset) {
318 dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
319 &pdev->resource[bar], &offset);
323 addr = pci_iomap(pdev, bar, 0);
325 dev_err(dev, "failed to map registers\n");
329 dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
332 map->base = addr + map->block_offset;
336 static void cxl_unmap_regblock(struct pci_dev *pdev,
337 struct cxl_register_map *map)
339 pci_iounmap(pdev, map->base - map->block_offset);
343 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
345 struct cxl_component_reg_map *comp_map;
346 struct cxl_device_reg_map *dev_map;
347 struct device *dev = &pdev->dev;
348 void __iomem *base = map->base;
350 switch (map->reg_type) {
351 case CXL_REGLOC_RBI_COMPONENT:
352 comp_map = &map->component_map;
353 cxl_probe_component_regs(dev, base, comp_map);
354 if (!comp_map->hdm_decoder.valid) {
355 dev_err(dev, "HDM decoder registers not found\n");
359 dev_dbg(dev, "Set up component registers\n");
361 case CXL_REGLOC_RBI_MEMDEV:
362 dev_map = &map->device_map;
363 cxl_probe_device_regs(dev, base, dev_map);
364 if (!dev_map->status.valid || !dev_map->mbox.valid ||
365 !dev_map->memdev.valid) {
366 dev_err(dev, "registers not found: %s%s%s\n",
367 !dev_map->status.valid ? "status " : "",
368 !dev_map->mbox.valid ? "mbox " : "",
369 !dev_map->memdev.valid ? "memdev " : "");
373 dev_dbg(dev, "Probing device registers...\n");
382 static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
384 struct device *dev = cxlm->dev;
385 struct pci_dev *pdev = to_pci_dev(dev);
387 switch (map->reg_type) {
388 case CXL_REGLOC_RBI_COMPONENT:
389 cxl_map_component_regs(pdev, &cxlm->regs.component, map);
390 dev_dbg(dev, "Mapping component registers...\n");
392 case CXL_REGLOC_RBI_MEMDEV:
393 cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map);
394 dev_dbg(dev, "Probing device registers...\n");
403 static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
404 struct cxl_register_map *map)
407 ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
408 map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
409 map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
413 * cxl_find_regblock() - Locate register blocks by type
414 * @pdev: The CXL PCI device to enumerate.
415 * @type: Register Block Indicator id
416 * @map: Enumeration output, clobbered on error
418 * Return: 0 if register block enumerated, negative error code otherwise
420 * A CXL DVSEC may point to one or more register blocks, search for them
423 static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
424 struct cxl_register_map *map)
426 u32 regloc_size, regblocks;
429 regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
430 PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
434 pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size);
435 regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
437 regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
438 regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
440 for (i = 0; i < regblocks; i++, regloc += 8) {
443 pci_read_config_dword(pdev, regloc, ®_lo);
444 pci_read_config_dword(pdev, regloc + 4, ®_hi);
446 cxl_decode_regblock(reg_lo, reg_hi, map);
448 if (map->reg_type == type)
455 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
456 struct cxl_register_map *map)
460 rc = cxl_find_regblock(pdev, type, map);
464 rc = cxl_map_regblock(pdev, map);
468 rc = cxl_probe_regs(pdev, map);
469 cxl_unmap_regblock(pdev, map);
474 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
476 struct cxl_register_map map;
477 struct cxl_memdev *cxlmd;
478 struct cxl_mem *cxlm;
482 * Double check the anonymous union trickery in struct cxl_regs
483 * FIXME switch to struct_group()
485 BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
486 offsetof(struct cxl_regs, device_regs.memdev));
488 rc = pcim_enable_device(pdev);
492 cxlm = cxl_mem_create(&pdev->dev);
494 return PTR_ERR(cxlm);
496 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
500 rc = cxl_map_regs(cxlm, &map);
504 rc = cxl_pci_setup_mailbox(cxlm);
508 rc = cxl_mem_enumerate_cmds(cxlm);
512 rc = cxl_mem_identify(cxlm);
516 rc = cxl_mem_create_range_info(cxlm);
520 cxlmd = devm_cxl_add_memdev(cxlm);
522 return PTR_ERR(cxlmd);
524 if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
525 rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
530 static const struct pci_device_id cxl_mem_pci_tbl[] = {
531 /* PCI class code for CXL.mem Type-3 Devices */
532 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
533 { /* terminate list */ },
535 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
537 static struct pci_driver cxl_pci_driver = {
538 .name = KBUILD_MODNAME,
539 .id_table = cxl_mem_pci_tbl,
540 .probe = cxl_pci_probe,
542 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
546 MODULE_LICENSE("GPL v2");
547 module_pci_driver(cxl_pci_driver);
548 MODULE_IMPORT_NS(CXL);