1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/security.h>
5 #include <linux/debugfs.h>
6 #include <linux/mutex.h>
12 static bool cxl_raw_allow_all;
17 * Core implementation of the CXL 2.0 Type-3 Memory Device Mailbox. The
18 * implementation is used by the cxl_pci driver to initialize the device
19 * and implement the cxl_mem.h IOCTL UAPI. It also implements the
20 * backend of the cxl_pmem_ctl() transport for LIBNVDIMM.
23 #define cxl_for_each_cmd(cmd) \
24 for ((cmd) = &cxl_mem_commands[0]; \
25 ((cmd) - cxl_mem_commands) < ARRAY_SIZE(cxl_mem_commands); (cmd)++)
27 #define CXL_CMD(_id, sin, sout, _flags) \
28 [CXL_MEM_COMMAND_ID_##_id] = { \
30 .id = CXL_MEM_COMMAND_ID_##_id, \
34 .opcode = CXL_MBOX_OP_##_id, \
39 * This table defines the supported mailbox commands for the driver. This table
40 * is made up of a UAPI structure. Non-negative values as parameters in the
41 * table will be validated against the user's input. For example, if size_in is
42 * 0, and the user passed in 1, it is an error.
44 static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
45 CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
46 #ifdef CONFIG_CXL_MEM_RAW_COMMANDS
47 CXL_CMD(RAW, ~0, ~0, 0),
49 CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
50 CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
51 CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
52 CXL_CMD(GET_LSA, 0x8, ~0, 0),
53 CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
54 CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
55 CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
56 CXL_CMD(SET_LSA, ~0, 0, 0),
57 CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
58 CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
59 CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
60 CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
61 CXL_CMD(GET_POISON, 0x10, ~0, 0),
62 CXL_CMD(INJECT_POISON, 0x8, 0, 0),
63 CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
64 CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
65 CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
66 CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
70 * Commands that RAW doesn't permit. The rationale for each:
72 * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
73 * coordination of transaction timeout values at the root bridge level.
75 * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
76 * and needs to be coordinated with HDM updates.
78 * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
79 * driver and any writes from userspace invalidates those contents.
81 * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
82 * to the device after it is marked clean, userspace can not make that
85 * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
86 * is kept up to date with patrol notifications and error management.
88 static u16 cxl_disabled_raw_commands[] = {
89 CXL_MBOX_OP_ACTIVATE_FW,
90 CXL_MBOX_OP_SET_PARTITION_INFO,
92 CXL_MBOX_OP_SET_SHUTDOWN_STATE,
93 CXL_MBOX_OP_SCAN_MEDIA,
94 CXL_MBOX_OP_GET_SCAN_MEDIA,
98 * Command sets that RAW doesn't permit. All opcodes in this set are
99 * disabled because they pass plain text security payloads over the
100 * user/kernel boundary. This functionality is intended to be wrapped
101 * behind the keys ABI which allows for encrypted payloads in the UAPI
103 static u8 security_command_sets[] = {
105 0x45, /* Persistent Memory Data-at-rest Security */
106 0x46, /* Security Passthrough */
109 static bool cxl_is_security_command(u16 opcode)
113 for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
114 if (security_command_sets[i] == (opcode >> 8))
119 static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
121 struct cxl_mem_command *c;
124 if (c->opcode == opcode)
131 * cxl_mbox_send_cmd() - Send a mailbox command to a device.
132 * @cxlds: The device data for the operation
133 * @opcode: Opcode for the mailbox command.
134 * @in: The input payload for the mailbox command.
135 * @in_size: The length of the input payload
136 * @out: Caller allocated buffer for the output.
137 * @out_size: Expected size of output.
139 * Context: Any context. Will acquire and release mbox_mutex.
141 * * %>=0 - Number of bytes returned in @out.
142 * * %-E2BIG - Payload is too large for hardware.
143 * * %-EBUSY - Couldn't acquire exclusive mailbox access.
144 * * %-EFAULT - Hardware error occurred.
145 * * %-ENXIO - Command completed, but device reported an error.
146 * * %-EIO - Unexpected output size.
148 * Mailbox commands may execute successfully yet the device itself reported an
149 * error. While this distinction can be useful for commands from userspace, the
150 * kernel will only be able to use results when both are successful.
152 int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
153 size_t in_size, void *out, size_t out_size)
155 const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
156 struct cxl_mbox_cmd mbox_cmd = {
160 .size_out = out_size,
165 if (out_size > cxlds->payload_size)
168 rc = cxlds->mbox_send(cxlds, &mbox_cmd);
172 /* TODO: Map return code to proper kernel style errno */
173 if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
177 * Variable sized commands can't be validated and so it's up to the
178 * caller to do that if they wish.
180 if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
185 EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL);
187 static bool cxl_mem_raw_command_allowed(u16 opcode)
191 if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
194 if (security_locked_down(LOCKDOWN_PCI_ACCESS))
197 if (cxl_raw_allow_all)
200 if (cxl_is_security_command(opcode))
203 for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
204 if (cxl_disabled_raw_commands[i] == opcode)
211 * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
212 * @cxlds: The device data for the operation
213 * @send_cmd: &struct cxl_send_command copied in from userspace.
214 * @out_cmd: Sanitized and populated &struct cxl_mem_command.
217 * * %0 - @out_cmd is ready to send.
218 * * %-ENOTTY - Invalid command specified.
219 * * %-EINVAL - Reserved fields or invalid values were used.
220 * * %-ENOMEM - Input or output buffer wasn't sized properly.
221 * * %-EPERM - Attempted to use a protected command.
222 * * %-EBUSY - Kernel has claimed exclusive access to this opcode
224 * The result of this command is a fully validated command in @out_cmd that is
225 * safe to send to the hardware.
227 * See handle_mailbox_cmd_from_user()
229 static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds,
230 const struct cxl_send_command *send_cmd,
231 struct cxl_mem_command *out_cmd)
233 const struct cxl_command_info *info;
234 struct cxl_mem_command *c;
236 if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
240 * The user can never specify an input payload larger than what hardware
241 * supports, but output can be arbitrarily large (simply write out as
242 * much data as the hardware provides).
244 if (send_cmd->in.size > cxlds->payload_size)
248 * Checks are bypassed for raw commands but a WARN/taint will occur
249 * later in the callchain
251 if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
252 const struct cxl_mem_command temp = {
254 .id = CXL_MEM_COMMAND_ID_RAW,
256 .size_in = send_cmd->in.size,
257 .size_out = send_cmd->out.size,
259 .opcode = send_cmd->raw.opcode
262 if (send_cmd->raw.rsvd)
266 * Unlike supported commands, the output size of RAW commands
267 * gets passed along without further checking, so it must be
270 if (send_cmd->out.size > cxlds->payload_size)
273 if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
276 memcpy(out_cmd, &temp, sizeof(temp));
281 if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
287 if (send_cmd->in.rsvd || send_cmd->out.rsvd)
290 /* Convert user's command into the internal representation */
291 c = &cxl_mem_commands[send_cmd->id];
294 /* Check that the command is enabled for hardware */
295 if (!test_bit(info->id, cxlds->enabled_cmds))
298 /* Check that the command is not claimed for exclusive kernel use */
299 if (test_bit(info->id, cxlds->exclusive_cmds))
302 /* Check the input buffer is the expected size */
303 if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
306 /* Check the output buffer is at least large enough */
307 if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
310 memcpy(out_cmd, c, sizeof(*c));
311 out_cmd->info.size_in = send_cmd->in.size;
313 * XXX: out_cmd->info.size_out will be controlled by the driver, and the
314 * specified number of bytes @send_cmd->out.size will be copied back out
321 int cxl_query_cmd(struct cxl_memdev *cxlmd,
322 struct cxl_mem_query_commands __user *q)
324 struct device *dev = &cxlmd->dev;
325 struct cxl_mem_command *cmd;
329 dev_dbg(dev, "Query IOCTL\n");
331 if (get_user(n_commands, &q->n_commands))
334 /* returns the total number if 0 elements are requested. */
336 return put_user(ARRAY_SIZE(cxl_mem_commands), &q->n_commands);
339 * otherwise, return max(n_commands, total commands) cxl_command_info
342 cxl_for_each_cmd(cmd) {
343 const struct cxl_command_info *info = &cmd->info;
345 if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
356 * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
357 * @cxlds: The device data for the operation
358 * @cmd: The validated command.
359 * @in_payload: Pointer to userspace's input payload.
360 * @out_payload: Pointer to userspace's output payload.
361 * @size_out: (Input) Max payload size to copy out.
362 * (Output) Payload size hardware generated.
363 * @retval: Hardware generated return code from the operation.
366 * * %0 - Mailbox transaction succeeded. This implies the mailbox
367 * protocol completed successfully not that the operation itself
369 * * %-ENOMEM - Couldn't allocate a bounce buffer.
370 * * %-EFAULT - Something happened with copy_to/from_user.
371 * * %-EINTR - Mailbox acquisition interrupted.
372 * * %-EXXX - Transaction level failures.
374 * Creates the appropriate mailbox command and dispatches it on behalf of a
375 * userspace request. The input and output payloads are copied between
378 * See cxl_send_cmd().
380 static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds,
381 const struct cxl_mem_command *cmd,
382 u64 in_payload, u64 out_payload,
383 s32 *size_out, u32 *retval)
385 struct device *dev = cxlds->dev;
386 struct cxl_mbox_cmd mbox_cmd = {
387 .opcode = cmd->opcode,
388 .size_in = cmd->info.size_in,
389 .size_out = cmd->info.size_out,
393 if (cmd->info.size_out) {
394 mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
395 if (!mbox_cmd.payload_out)
399 if (cmd->info.size_in) {
400 mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
402 if (IS_ERR(mbox_cmd.payload_in)) {
403 kvfree(mbox_cmd.payload_out);
404 return PTR_ERR(mbox_cmd.payload_in);
409 "Submitting %s command for user\n"
412 cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
415 dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
416 "raw command path used\n");
418 rc = cxlds->mbox_send(cxlds, &mbox_cmd);
423 * @size_out contains the max size that's allowed to be written back out
424 * to userspace. While the payload may have written more output than
425 * this it will have to be ignored.
427 if (mbox_cmd.size_out) {
428 dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
429 "Invalid return size\n");
430 if (copy_to_user(u64_to_user_ptr(out_payload),
431 mbox_cmd.payload_out, mbox_cmd.size_out)) {
437 *size_out = mbox_cmd.size_out;
438 *retval = mbox_cmd.return_code;
441 kvfree(mbox_cmd.payload_in);
442 kvfree(mbox_cmd.payload_out);
446 int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
448 struct cxl_dev_state *cxlds = cxlmd->cxlds;
449 struct device *dev = &cxlmd->dev;
450 struct cxl_send_command send;
451 struct cxl_mem_command c;
454 dev_dbg(dev, "Send IOCTL\n");
456 if (copy_from_user(&send, s, sizeof(send)))
459 rc = cxl_validate_cmd_from_user(cxlmd->cxlds, &send, &c);
463 /* Prepare to handle a full payload for variable sized output */
464 if (c.info.size_out < 0)
465 c.info.size_out = cxlds->payload_size;
467 rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload,
468 send.out.payload, &send.out.size,
473 if (copy_to_user(s, &send, sizeof(send)))
479 static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 *out)
481 u32 remaining = size;
485 u32 xfer_size = min_t(u32, remaining, cxlds->payload_size);
486 struct cxl_mbox_get_log log = {
488 .offset = cpu_to_le32(offset),
489 .length = cpu_to_le32(xfer_size)
493 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LOG, &log, sizeof(log),
499 remaining -= xfer_size;
507 * cxl_walk_cel() - Walk through the Command Effects Log.
508 * @cxlds: The device data for the operation
509 * @size: Length of the Command Effects Log.
512 * Iterate over each entry in the CEL and determine if the driver supports the
513 * command. If so, the command is enabled for the device and can be used later.
515 static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
517 struct cxl_cel_entry *cel_entry;
518 const int cel_entries = size / sizeof(*cel_entry);
521 cel_entry = (struct cxl_cel_entry *) cel;
523 for (i = 0; i < cel_entries; i++) {
524 u16 opcode = le16_to_cpu(cel_entry[i].opcode);
525 struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
529 "Opcode 0x%04x unsupported by driver", opcode);
533 set_bit(cmd->info.id, cxlds->enabled_cmds);
537 static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds)
539 struct cxl_mbox_get_supported_logs *ret;
542 ret = kvmalloc(cxlds->payload_size, GFP_KERNEL);
544 return ERR_PTR(-ENOMEM);
546 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL, 0, ret,
547 cxlds->payload_size);
561 /* See CXL 2.0 Table 170. Get Log Input Payload */
562 static const uuid_t log_uuid[] = {
563 [CEL_UUID] = DEFINE_CXL_CEL_UUID,
564 [VENDOR_DEBUG_UUID] = DEFINE_CXL_VENDOR_DEBUG_UUID,
568 * cxl_enumerate_cmds() - Enumerate commands for a device.
569 * @cxlds: The device data for the operation
571 * Returns 0 if enumerate completed successfully.
573 * CXL devices have optional support for certain commands. This function will
574 * determine the set of supported commands for the hardware and update the
575 * enabled_cmds bitmap in the @cxlds.
577 int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
579 struct cxl_mbox_get_supported_logs *gsl;
580 struct device *dev = cxlds->dev;
581 struct cxl_mem_command *cmd;
584 gsl = cxl_get_gsl(cxlds);
589 for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
590 u32 size = le32_to_cpu(gsl->entry[i].size);
591 uuid_t uuid = gsl->entry[i].uuid;
594 dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
596 if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
599 log = kvmalloc(size, GFP_KERNEL);
605 rc = cxl_xfer_log(cxlds, &uuid, size, log);
611 cxl_walk_cel(cxlds, size, log);
614 /* In case CEL was bogus, enable some default commands. */
615 cxl_for_each_cmd(cmd)
616 if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
617 set_bit(cmd->info.id, cxlds->enabled_cmds);
619 /* Found the required CEL */
627 EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
630 * cxl_mem_get_partition_info - Get partition info
631 * @cxlds: The device data for the operation
633 * Retrieve the current partition info for the device specified. The active
634 * values are the current capacity in bytes. If not 0, the 'next' values are
635 * the pending values, in bytes, which take affect on next cold reset.
637 * Return: 0 if no error: or the result of the mailbox command.
639 * See CXL @8.2.9.5.2.1 Get Partition Info
641 static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
643 struct cxl_mbox_get_partition_info {
644 __le64 active_volatile_cap;
645 __le64 active_persistent_cap;
646 __le64 next_volatile_cap;
647 __le64 next_persistent_cap;
651 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
657 cxlds->active_volatile_bytes =
658 le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
659 cxlds->active_persistent_bytes =
660 le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
661 cxlds->next_volatile_bytes =
662 le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
663 cxlds->next_persistent_bytes =
664 le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
670 * cxl_dev_state_identify() - Send the IDENTIFY command to the device.
671 * @cxlds: The device data for the operation
673 * Return: 0 if identify was executed successfully.
675 * This will dispatch the identify command to the device and on success populate
676 * structures to be exported to sysfs.
678 int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
680 /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
681 struct cxl_mbox_identify id;
684 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
690 le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER;
691 cxlds->volatile_only_bytes =
692 le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER;
693 cxlds->persistent_only_bytes =
694 le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER;
695 cxlds->partition_align_bytes =
696 le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
699 "Identify Memory Device\n"
700 " total_bytes = %#llx\n"
701 " volatile_only_bytes = %#llx\n"
702 " persistent_only_bytes = %#llx\n"
703 " partition_align_bytes = %#llx\n",
704 cxlds->total_bytes, cxlds->volatile_only_bytes,
705 cxlds->persistent_only_bytes, cxlds->partition_align_bytes);
707 cxlds->lsa_size = le32_to_cpu(id.lsa_size);
708 memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
712 EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
714 int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
718 if (cxlds->partition_align_bytes == 0) {
719 cxlds->ram_range.start = 0;
720 cxlds->ram_range.end = cxlds->volatile_only_bytes - 1;
721 cxlds->pmem_range.start = cxlds->volatile_only_bytes;
722 cxlds->pmem_range.end = cxlds->volatile_only_bytes +
723 cxlds->persistent_only_bytes - 1;
727 rc = cxl_mem_get_partition_info(cxlds);
729 dev_err(cxlds->dev, "Failed to query partition information\n");
734 "Get Partition Info\n"
735 " active_volatile_bytes = %#llx\n"
736 " active_persistent_bytes = %#llx\n"
737 " next_volatile_bytes = %#llx\n"
738 " next_persistent_bytes = %#llx\n",
739 cxlds->active_volatile_bytes, cxlds->active_persistent_bytes,
740 cxlds->next_volatile_bytes, cxlds->next_persistent_bytes);
742 cxlds->ram_range.start = 0;
743 cxlds->ram_range.end = cxlds->active_volatile_bytes - 1;
745 cxlds->pmem_range.start = cxlds->active_volatile_bytes;
746 cxlds->pmem_range.end =
747 cxlds->active_volatile_bytes + cxlds->active_persistent_bytes - 1;
751 EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
753 struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
755 struct cxl_dev_state *cxlds;
757 cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
759 dev_err(dev, "No memory available\n");
760 return ERR_PTR(-ENOMEM);
763 mutex_init(&cxlds->mbox_mutex);
768 EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
770 static struct dentry *cxl_debugfs;
772 void __init cxl_mbox_init(void)
774 struct dentry *mbox_debugfs;
776 cxl_debugfs = debugfs_create_dir("cxl", NULL);
777 mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
778 debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
782 void cxl_mbox_exit(void)
784 debugfs_remove_recursive(cxl_debugfs);