nvme: add 'iopolicy' module parameter
authorHannes Reinecke <hare@suse.de>
Mon, 20 Dec 2021 12:51:45 +0000 (13:51 +0100)
committerChristoph Hellwig <hch@lst.de>
Thu, 23 Dec 2021 10:22:46 +0000 (11:22 +0100)
While the 'iopolicy' sysfs attribute can be set at runtime, most
storage arrays prefer to use the 'round-robin' iopolicy per default.
We can use udev rules to set this, but is getting rather unwieldy
for rebranded arrays as we would have to update the udev rules
anytime a new array shows up, leading to the same mess we currently
have in multipathd for configuring the RDAC arrays.

Hence this patch adds a module parameter 'iopolicy' to allow the
admin to switch the default, and to do away with the need for a
udev rule here.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/core.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h

index 9666c7b..4fc794d 100644 (file)
@@ -2746,9 +2746,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
                return -EINVAL;
        }
        subsys->awupf = le16_to_cpu(id->awupf);
-#ifdef CONFIG_NVME_MULTIPATH
-       subsys->iopolicy = NVME_IOPOLICY_NUMA;
-#endif
+       nvme_mpath_default_iopolicy(subsys);
 
        subsys->dev.class = nvme_subsys_class;
        subsys->dev.release = nvme_release_subsystem;
index 7f2071f..892bd5d 100644 (file)
@@ -13,6 +13,42 @@ module_param(multipath, bool, 0444);
 MODULE_PARM_DESC(multipath,
        "turn on native support for multiple controllers per subsystem");
 
+static const char *nvme_iopolicy_names[] = {
+       [NVME_IOPOLICY_NUMA]    = "numa",
+       [NVME_IOPOLICY_RR]      = "round-robin",
+};
+
+static int iopolicy = NVME_IOPOLICY_NUMA;
+
+static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
+{
+       if (!val)
+               return -EINVAL;
+       if (!strncmp(val, "numa", 4))
+               iopolicy = NVME_IOPOLICY_NUMA;
+       else if (!strncmp(val, "round-robin", 11))
+               iopolicy = NVME_IOPOLICY_RR;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
+{
+       return sprintf(buf, "%s\n", nvme_iopolicy_names[iopolicy]);
+}
+
+module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
+       &iopolicy, 0644);
+MODULE_PARM_DESC(iopolicy,
+       "Default multipath I/O policy; 'numa' (default) or 'round-robin'");
+
+void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
+{
+       subsys->iopolicy = iopolicy;
+}
+
 void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
 {
        struct nvme_ns_head *h;
@@ -706,11 +742,6 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
        struct device_attribute subsys_attr_##_name =   \
                __ATTR(_name, _mode, _show, _store)
 
-static const char *nvme_iopolicy_names[] = {
-       [NVME_IOPOLICY_NUMA]    = "numa",
-       [NVME_IOPOLICY_RR]      = "round-robin",
-};
-
 static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
index a54096b..fe22401 100644 (file)
@@ -767,6 +767,7 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
 void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
 void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
 void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
+void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
 bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags);
 void nvme_failover_req(struct request *req);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
@@ -864,6 +865,9 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
 static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
 {
 }
+static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
+{
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 int nvme_revalidate_zones(struct nvme_ns *ns);