nvme: allow passthru cmd error logging
authorAlan Adamson <alan.adamson@oracle.com>
Tue, 30 Jan 2024 00:19:38 +0000 (16:19 -0800)
committerKeith Busch <kbusch@kernel.org>
Thu, 1 Feb 2024 15:44:53 +0000 (07:44 -0800)
Commit d7ac8dca938c ("nvme: quiet user passthrough command errors")
disabled error logging for user passthrough commands.  This commit
adds the ability to opt-in to passthrough admin error logging. IO
commands initiated as passthrough will always be logged.

The logging output for passthrough commands (Admin and IO) has been
changed to include CDWXX fields.

nvme0n1: Read(0x2), LBA Out of Range (sct 0x0 / sc 0x80) DNR cdw10=0x0 cdw11=0x1
        cdw12=0x70000 cdw13=0x0 cdw14=0x0 cdw15=0x0

Add a helper function nvme_log_err_passthru() which allows us to log
error for passthru commands by decoding cdw10-cdw15 values of nvme
command.

Add a new sysfs attr passthru_err_log_enabled that allows user to conditionally
enable passthrough command logging for either passthrough Admin commands sent to
the controller or passthrough IO commands sent to a namespace.

By default, passthrough error logging is disabled.

To enable passthrough admin error logging:
        echo 1 > /sys/class/nvme/nvme0/passthru_err_log_enabled

To disable passthrough admin error logging:
        echo 0 > /sys/class/nvme/nvme0/passthru_err_log_enabled

To enable passthrough io error logging:
        echo 1 > /sys/class/nvme/nvme0/nvme0n1/passthru_err_log_enabled

To disable passthrough io error logging:
        echo 0 > /sys/class/nvme/nvme0/nvme0n1/passthru_err_log_enabled

Signed-off-by: Alan Adamson <alan.adamson@oracle.com>
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/sysfs.c

index 8b48b7f..0d124a8 100644 (file)
@@ -338,6 +338,30 @@ static void nvme_log_error(struct request *req)
                           nr->status & NVME_SC_DNR  ? "DNR "  : "");
 }
 
+static void nvme_log_err_passthru(struct request *req)
+{
+       struct nvme_ns *ns = req->q->queuedata;
+       struct nvme_request *nr = nvme_req(req);
+
+       pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s"
+               "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x cdw15=0x%x\n",
+               ns ? ns->disk->disk_name : dev_name(nr->ctrl->device),
+               ns ? nvme_get_opcode_str(nr->cmd->common.opcode) :
+                    nvme_get_admin_opcode_str(nr->cmd->common.opcode),
+               nr->cmd->common.opcode,
+               nvme_get_error_status_str(nr->status),
+               nr->status >> 8 & 7,    /* Status Code Type */
+               nr->status & 0xff,      /* Status Code */
+               nr->status & NVME_SC_MORE ? "MORE " : "",
+               nr->status & NVME_SC_DNR  ? "DNR "  : "",
+               nr->cmd->common.cdw10,
+               nr->cmd->common.cdw11,
+               nr->cmd->common.cdw12,
+               nr->cmd->common.cdw13,
+               nr->cmd->common.cdw14,
+               nr->cmd->common.cdw14);
+}
+
 enum nvme_disposition {
        COMPLETE,
        RETRY,
@@ -385,8 +409,12 @@ static inline void nvme_end_req(struct request *req)
 {
        blk_status_t status = nvme_error_status(nvme_req(req)->status);
 
-       if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET)))
-               nvme_log_error(req);
+       if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
+               if (blk_rq_is_passthrough(req))
+                       nvme_log_err_passthru(req);
+               else
+                       nvme_log_error(req);
+       }
        nvme_end_req_zoned(req);
        nvme_trace_bio_complete(req);
        if (req->cmd_flags & REQ_NVME_MPATH)
@@ -679,10 +707,21 @@ static inline void nvme_clear_nvme_request(struct request *req)
 /* initialize a passthrough request */
 void nvme_init_request(struct request *req, struct nvme_command *cmd)
 {
-       if (req->q->queuedata)
+       struct nvme_request *nr = nvme_req(req);
+       bool logging_enabled;
+
+       if (req->q->queuedata) {
+               struct nvme_ns *ns = req->q->disk->private_data;
+
+               logging_enabled = ns->passthru_err_log_enabled;
                req->timeout = NVME_IO_TIMEOUT;
-       else /* no queuedata implies admin queue */
+       } else { /* no queuedata implies admin queue */
+               logging_enabled = nr->ctrl->passthru_err_log_enabled;
                req->timeout = NVME_ADMIN_TIMEOUT;
+       }
+
+       if (!logging_enabled)
+               req->rq_flags |= RQF_QUIET;
 
        /* passthru commands should let the driver set the SGL flags */
        cmd->common.flags &= ~NVME_CMD_SGL_ALL;
@@ -691,8 +730,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd)
        if (req->mq_hctx->type == HCTX_TYPE_POLL)
                req->cmd_flags |= REQ_POLLED;
        nvme_clear_nvme_request(req);
-       req->rq_flags |= RQF_QUIET;
-       memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
+       memcpy(nr->cmd, cmd, sizeof(*cmd));
 }
 EXPORT_SYMBOL_GPL(nvme_init_request);
 
@@ -3658,6 +3696,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 
        ns->disk = disk;
        ns->queue = disk->queue;
+       ns->passthru_err_log_enabled = false;
 
        if (ctrl->opts && ctrl->opts->data_digest)
                blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
@@ -3721,6 +3760,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
        nvme_mpath_add_disk(ns, info->anagrpid);
        nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
 
+       /*
+        * Set ns->disk->device->driver_data to ns so we can access
+        * ns->logging_enabled in nvme_passthru_err_log_enabled_store() and
+        * nvme_passthru_err_log_enabled_show().
+        */
+       dev_set_drvdata(disk_to_dev(ns->disk), ns);
+
        return;
 
  out_cleanup_ns_from_list:
@@ -4521,6 +4567,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        int ret;
 
        WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
+       ctrl->passthru_err_log_enabled = false;
        clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
        spin_lock_init(&ctrl->lock);
        mutex_init(&ctrl->scan_lock);
index b70b333..3897334 100644 (file)
@@ -263,6 +263,7 @@ enum nvme_ctrl_flags {
 struct nvme_ctrl {
        bool comp_seen;
        bool identified;
+       bool passthru_err_log_enabled;
        enum nvme_ctrl_state state;
        spinlock_t lock;
        struct mutex scan_lock;
@@ -522,7 +523,7 @@ struct nvme_ns {
        struct device           cdev_device;
 
        struct nvme_fault_inject fault_inject;
-
+       bool                    passthru_err_log_enabled;
 };
 
 /* NVMe ns supports metadata actions by the controller (generate/strip) */
index 6b2f06f..d099218 100644 (file)
@@ -35,6 +35,62 @@ static ssize_t nvme_sysfs_rescan(struct device *dev,
 }
 static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
 
+static ssize_t nvme_adm_passthru_err_log_enabled_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf,
+                         ctrl->passthru_err_log_enabled ? "on\n" : "off\n");
+}
+
+static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       int err;
+       bool passthru_err_log_enabled;
+
+       err = kstrtobool(buf, &passthru_err_log_enabled);
+       if (err)
+               return -EINVAL;
+
+       ctrl->passthru_err_log_enabled = passthru_err_log_enabled;
+
+       return count;
+}
+
+static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvme_ns *n = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf, n->passthru_err_log_enabled ? "on\n" : "off\n");
+}
+
+static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct nvme_ns *ns = dev_get_drvdata(dev);
+       int err;
+       bool passthru_err_log_enabled;
+
+       err = kstrtobool(buf, &passthru_err_log_enabled);
+       if (err)
+               return -EINVAL;
+       ns->passthru_err_log_enabled = passthru_err_log_enabled;
+
+       return count;
+}
+
+static struct device_attribute dev_attr_adm_passthru_err_log_enabled = \
+       __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
+       nvme_adm_passthru_err_log_enabled_show, nvme_adm_passthru_err_log_enabled_store);
+
+static struct device_attribute dev_attr_io_passthru_err_log_enabled = \
+       __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
+       nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store);
+
 static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
 {
        struct gendisk *disk = dev_to_disk(dev);
@@ -208,6 +264,7 @@ static struct attribute *nvme_ns_attrs[] = {
        &dev_attr_ana_grpid.attr,
        &dev_attr_ana_state.attr,
 #endif
+       &dev_attr_io_passthru_err_log_enabled.attr,
        NULL,
 };
 
@@ -655,6 +712,7 @@ static struct attribute *nvme_dev_attrs[] = {
 #ifdef CONFIG_NVME_TCP_TLS
        &dev_attr_tls_key.attr,
 #endif
+       &dev_attr_adm_passthru_err_log_enabled.attr,
        NULL
 };