Home Home > GIT Browse > vanilla
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Tesarik <ptesarik@suse.cz>2019-01-25 14:25:37 +0100
committerPetr Tesarik <ptesarik@suse.cz>2019-01-25 14:25:37 +0100
commitfff90ccd897abba19392b8e31484538a96131d5a (patch)
tree5906b01e257ba4c3b6785a4b83ccfb6311f5b773
parentf02eaea17f5b649dbbf6cacc08953d3cb5e89ced (diff)
parent732ec99144a650fe8fb5c985343462c7020f17bf (diff)
Merge branch 'users/hare/SLE15-SP1/for-next' into SLE15-SP1
Pull NVMe load balancing from Hannes Reinecke Conflicts: series.conf
-rw-r--r--patches.suse/nvme-multipath-round-robin-I-O-policy.patch220
-rw-r--r--series.conf1
2 files changed, 221 insertions, 0 deletions
diff --git a/patches.suse/nvme-multipath-round-robin-I-O-policy.patch b/patches.suse/nvme-multipath-round-robin-I-O-policy.patch
new file mode 100644
index 0000000000..8d32b1621f
--- /dev/null
+++ b/patches.suse/nvme-multipath-round-robin-I-O-policy.patch
@@ -0,0 +1,220 @@
+From: Hannes Reinecke <hare@suse.de>
+Date: Thu, 15 Nov 2018 12:31:17 +0100
+Subject: [PATCH] nvme-multipath: round-robin I/O policy
+References: bsc#1110705
+Patch-Mainline: submitted linux-nvme 2019/01/25
+
+Implement a simple round-robin I/O policy for multipathing.
+Path selection is done in two rounds, first iterating across all
+optimized paths, and, if that doesn't return any valid paths,
+iterate over all optimized and non-optimized paths.
+If no paths are found we're using the existing algorithm.
+This patch also implements a sysfs attribute 'iopolicy' to switch
+between the current, NUMA-aware I/O policy and the 'round-robin'
+I/O policy.
+
+Signed-off-by: Hannes Reinecke <hare@suse.com>
+---
+ drivers/nvme/host/core.c | 6 +++
+ drivers/nvme/host/multipath.c | 102 +++++++++++++++++++++++++++++++++++++++++-
+ drivers/nvme/host/nvme.h | 12 +++++
+ 3 files changed, 119 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 6b2617e213cc..7f595086f6c6 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -2232,6 +2232,9 @@ static struct attribute *nvme_subsys_attrs[] = {
+ &subsys_attr_serial.attr,
+ &subsys_attr_firmware_rev.attr,
+ &subsys_attr_subsysnqn.attr,
++#ifdef CONFIG_NVME_MULTIPATH
++ &subsys_attr_iopolicy.attr,
++#endif
+ NULL,
+ };
+
+@@ -2284,6 +2287,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+ memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
+ subsys->vendor_id = le16_to_cpu(id->vid);
+ subsys->cmic = id->cmic;
++#ifdef CONFIG_NVME_MULTIPATH
++ subsys->iopolicy = NVME_IOPOLICY_NUMA;
++#endif
+
+ subsys->dev.class = nvme_subsys_class;
+ subsys->dev.release = nvme_release_subsystem;
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index 8b58a6ca5bf4..6122ff5aa438 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -141,7 +141,10 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
+ test_bit(NVME_NS_ANA_PENDING, &ns->flags))
+ continue;
+
+- distance = node_distance(node, ns->ctrl->numa_node);
++ if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
++ distance = node_distance(node, ns->ctrl->numa_node);
++ else
++ distance = LOCAL_DISTANCE;
+
+ switch (ns->ana_state) {
+ case NVME_ANA_OPTIMIZED:
+@@ -168,6 +171,56 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
+ return found;
+ }
+
++static struct nvme_ns *__nvme_rr_next_path(struct nvme_ns_head *head, int node,
++ struct nvme_ns *old)
++{
++ struct nvme_ns *ns, *found = NULL;
++ bool try_nonoptimized = false;
++
++ if (!old)
++ return NULL;
++retry:
++ ns = old;
++ do {
++ ns = list_next_or_null_rcu(&head->list, &ns->siblings,
++ struct nvme_ns, siblings);
++ if (!ns) {
++ ns = list_first_or_null_rcu(&head->list, struct nvme_ns,
++ siblings);
++ if (!ns)
++ return NULL;
++
++ if (ns == old)
++ /*
++ * The list consists of just one entry.
++ * Sorry for the noise :-)
++ */
++ return old;
++ }
++ if (!ns->disk || ns->ctrl->state != NVME_CTRL_LIVE ||
++ test_bit(NVME_NS_ANA_PENDING, &ns->flags))
++ continue;
++
++ if (ns->ana_state == NVME_ANA_OPTIMIZED) {
++ found = ns;
++ break;
++ }
++ if (try_nonoptimized &&
++ ns->ana_state == NVME_ANA_NONOPTIMIZED) {
++ found = ns;
++ break;
++ }
++ } while (ns != old);
++
++ if (found)
++ rcu_assign_pointer(head->current_path[node], found);
++ else if (!try_nonoptimized) {
++ try_nonoptimized = true;
++ goto retry;
++ }
++ return found;
++}
++
+ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
+ {
+ return ns->ctrl->state == NVME_CTRL_LIVE &&
+@@ -180,6 +233,8 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+ struct nvme_ns *ns;
+
+ ns = srcu_dereference(head->current_path[node], &head->srcu);
++ if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR)
++ ns = __nvme_rr_next_path(head, node, ns);
+ if (unlikely(!ns || !nvme_path_is_optimized(ns)))
+ ns = __nvme_find_path(head, node);
+ return ns;
+@@ -487,6 +542,51 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
+ cancel_work_sync(&ctrl->ana_work);
+ }
+
++#define SUBSYS_ATTR_RW(_name, _mode, _show, _store) \
++ struct device_attribute subsys_attr_##_name = \
++ __ATTR(_name, _mode, _show, _store)
++
++static const char *nvme_iopolicy_names[] = {
++ [NVME_IOPOLICY_UNKNOWN] = "unknown",
++ [NVME_IOPOLICY_NUMA] = "numa",
++ [NVME_IOPOLICY_RR] = "round-robin",
++};
++
++static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct nvme_subsystem *subsys =
++ container_of(dev, struct nvme_subsystem, dev);
++ int iopolicy = NVME_IOPOLICY_UNKNOWN;
++
++ if (iopolicy < ARRAY_SIZE(nvme_iopolicy_names))
++ iopolicy = READ_ONCE(subsys->iopolicy);
++ return sprintf(buf, "%s\n", nvme_iopolicy_names[iopolicy]);
++}
++
++static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
++ struct device_attribute *attr, const char *buf, size_t count)
++{
++ enum nvme_iopolicy iopolicy = NVME_IOPOLICY_UNKNOWN;
++ struct nvme_subsystem *subsys =
++ container_of(dev, struct nvme_subsystem, dev);
++
++ if (!strncmp(buf, nvme_iopolicy_names[NVME_IOPOLICY_NUMA],
++ strlen(nvme_iopolicy_names[NVME_IOPOLICY_NUMA])))
++ iopolicy = NVME_IOPOLICY_NUMA;
++ else if (!strncmp(buf, nvme_iopolicy_names[NVME_IOPOLICY_RR],
++ strlen(nvme_iopolicy_names[NVME_IOPOLICY_RR])))
++ iopolicy = NVME_IOPOLICY_RR;
++
++ if (iopolicy == NVME_IOPOLICY_UNKNOWN)
++ return -EINVAL;
++
++ WRITE_ONCE(subsys->iopolicy, iopolicy);
++ return count;
++}
++SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
++ nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
++
+ static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index b84be08bbbe0..7967837b6a8c 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -243,6 +243,14 @@ struct nvme_ctrl {
+ struct nvmf_ctrl_options *opts;
+ };
+
++#ifdef CONFIG_NVME_MULTIPATH
++enum nvme_iopolicy {
++ NVME_IOPOLICY_UNKNOWN,
++ NVME_IOPOLICY_NUMA,
++ NVME_IOPOLICY_RR,
++};
++#endif
++
+ struct nvme_subsystem {
+ int instance;
+ struct device dev;
+@@ -262,6 +270,9 @@ struct nvme_subsystem {
+ u8 cmic;
+ u16 vendor_id;
+ struct ida ns_ida;
++#ifdef CONFIG_NVME_MULTIPATH
++ enum nvme_iopolicy iopolicy;
++#endif
+ };
+
+ /*
+@@ -482,6 +493,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+
+ extern struct device_attribute dev_attr_ana_grpid;
+ extern struct device_attribute dev_attr_ana_state;
++extern struct device_attribute subsys_attr_iopolicy;
+
+ #else
+ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
+--
+2.16.4
+
diff --git a/series.conf b/series.conf
index 3caef10dd1..5667ea3fcb 100644
--- a/series.conf
+++ b/series.conf
@@ -42718,6 +42718,7 @@
patches.fixes/nvme-multipath-zero-out-ANA-log-buffer.patch
patches.arch/powerpc-tm-Avoid-machine-crash-on-rt_sigreturn.patch
patches.drivers/net-mvpp2-fix-condition-for-setting-up-link-interrup.patch
+ patches.suse/nvme-multipath-round-robin-I-O-policy.patch
########################################################
# end of sorted patches