Home Home > GIT Browse > SLE12-SP4-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOlaf Hering <ohering@suse.de>2019-04-24 09:21:01 +0200
committerOlaf Hering <ohering@suse.de>2019-04-24 09:21:01 +0200
commit8ab970a76cc1088a8a7c598b4f815b07fa8abe7e (patch)
treee3e69d80979e92d1629bb7f70713772f37872c60
parentb107b591d7dd506519959d2b52fba64c730ee81d (diff)
- Drivers: hv: vmbus: Offload the handling of channels to twoSLE12-SP4-AZURE
workqueues (bsc#1130567).
-rw-r--r--patches.suse/msft-hv-1803-Drivers-hv-vmbus-Offload-the-handling-of-channels-to.patch392
-rw-r--r--patches.suse/msft-hv-1806-Drivers-hv-vmbus-Remove-the-useless-API-vmbus_get_ou.patch8
-rw-r--r--series.conf1
3 files changed, 393 insertions, 8 deletions
diff --git a/patches.suse/msft-hv-1803-Drivers-hv-vmbus-Offload-the-handling-of-channels-to.patch b/patches.suse/msft-hv-1803-Drivers-hv-vmbus-Offload-the-handling-of-channels-to.patch
new file mode 100644
index 0000000000..6899267377
--- /dev/null
+++ b/patches.suse/msft-hv-1803-Drivers-hv-vmbus-Offload-the-handling-of-channels-to.patch
@@ -0,0 +1,392 @@
+From: Dexuan Cui <decui@microsoft.com>
+Date: Mon, 3 Dec 2018 00:54:35 +0000
+Patch-mainline: v4.20-rc6
+Subject: Drivers: hv: vmbus: Offload the handling of channels to two workqueues
+Git-commit: 37c2578c0c40e286bc0d30bdc05290b2058cf66e
+References: bsc#1130567
+
+vmbus_process_offer() mustn't call channel->sc_creation_callback()
+directly for sub-channels, because sc_creation_callback() ->
+vmbus_open() may never get the host's response to the
+OPEN_CHANNEL message (the host may rescind a channel at any time,
+e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
+may not wake up the vmbus_open() as it's blocked due to a non-zero
+vmbus_connection.offer_in_progress, and finally we have a deadlock.
+
+The above is also true for primary channels, if the related device
+drivers use sync probing mode by default.
+
+And, usually the handling of primary channels and sub-channels can
+depend on each other, so we should offload them to different
+workqueues to avoid possible deadlock, e.g. in sync-probing mode,
+NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
+rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
+and waits for all the sub-channels to appear, but the latter
+can't get the rtnl_lock and this blocks the handling of sub-channels.
+
+The patch can fix the multiple-NIC deadlock described above for
+v3.x kernels (e.g. RHEL 7.x) which don't support async-probing
+of devices, and v4.4, v4.9, v4.14 and v4.18 which support async-probing
+but don't enable async-probing for Hyper-V drivers (yet).
+
+The patch can also fix the hang issue in sub-channel's handling described
+above for all versions of kernels, including v4.19 and v4.20-rc4.
+
+So actually the patch should be applied to all the existing kernels,
+not only the kernels that have 8195b1396ec8.
+
+Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
+Cc: stable@vger.kernel.org
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Cc: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Acked-by: Olaf Hering <ohering@suse.de>
+---
+ drivers/hv/channel_mgmt.c | 189 ++++++++++++++++++++++++++++++----------------
+ drivers/hv/connection.c | 24 +++++-
+ drivers/hv/hyperv_vmbus.h | 7 ++
+ include/linux/hyperv.h | 7 ++
+ 4 files changed, 161 insertions(+), 66 deletions(-)
+
+diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
+--- a/drivers/hv/channel_mgmt.c
++++ b/drivers/hv/channel_mgmt.c
+@@ -435,61 +435,16 @@ void vmbus_free_channels(void)
+ }
+ }
+
+-/*
+- * vmbus_process_offer - Process the offer by creating a channel/device
+- * associated with this offer
+- */
+-static void vmbus_process_offer(struct vmbus_channel *newchannel)
++/* Note: the function can run concurrently for primary/sub channels. */
++static void vmbus_add_channel_work(struct work_struct *work)
+ {
+- struct vmbus_channel *channel;
+- bool fnew = true;
++ struct vmbus_channel *newchannel =
++ container_of(work, struct vmbus_channel, add_channel_work);
++ struct vmbus_channel *primary_channel = newchannel->primary_channel;
+ unsigned long flags;
+ u16 dev_type;
+ int ret;
+
+- /* Make sure this is a new offer */
+- mutex_lock(&vmbus_connection.channel_mutex);
+-
+- /*
+- * Now that we have acquired the channel_mutex,
+- * we can release the potentially racing rescind thread.
+- */
+- atomic_dec(&vmbus_connection.offer_in_progress);
+-
+- list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+- if (!uuid_le_cmp(channel->offermsg.offer.if_type,
+- newchannel->offermsg.offer.if_type) &&
+- !uuid_le_cmp(channel->offermsg.offer.if_instance,
+- newchannel->offermsg.offer.if_instance)) {
+- fnew = false;
+- break;
+- }
+- }
+-
+- if (fnew)
+- list_add_tail(&newchannel->listentry,
+- &vmbus_connection.chn_list);
+-
+- mutex_unlock(&vmbus_connection.channel_mutex);
+-
+- if (!fnew) {
+- /*
+- * Check to see if this is a sub-channel.
+- */
+- if (newchannel->offermsg.offer.sub_channel_index != 0) {
+- /*
+- * Process the sub-channel.
+- */
+- newchannel->primary_channel = channel;
+- spin_lock_irqsave(&channel->lock, flags);
+- list_add_tail(&newchannel->sc_list, &channel->sc_list);
+- channel->num_sc++;
+- spin_unlock_irqrestore(&channel->lock, flags);
+- } else {
+- goto err_free_chan;
+- }
+- }
+-
+ dev_type = hv_get_dev_type(newchannel);
+
+ init_vp_index(newchannel, dev_type);
+@@ -507,27 +462,26 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
+ /*
+ * This state is used to indicate a successful open
+ * so that when we do close the channel normally, we
+- * can cleanup properly
++ * can cleanup properly.
+ */
+ newchannel->state = CHANNEL_OPEN_STATE;
+
+- if (!fnew) {
+- struct hv_device *dev
+- = newchannel->primary_channel->device_obj;
++ if (primary_channel != NULL) {
++ /* newchannel is a sub-channel. */
++ struct hv_device *dev = primary_channel->device_obj;
+
+ if (vmbus_add_channel_kobj(dev, newchannel))
+- goto err_free_chan;
++ goto err_deq_chan;
++
++ if (primary_channel->sc_creation_callback != NULL)
++ primary_channel->sc_creation_callback(newchannel);
+
+- if (channel->sc_creation_callback != NULL)
+- channel->sc_creation_callback(newchannel);
+ newchannel->probe_done = true;
+ return;
+ }
+
+ /*
+- * Start the process of binding this offer to the driver
+- * We need to set the DeviceObject field before calling
+- * vmbus_child_dev_add()
++ * Start the process of binding the primary channel to the driver
+ */
+ newchannel->device_obj = vmbus_device_create(
+ &newchannel->offermsg.offer.if_type,
+@@ -556,13 +510,28 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
+
+ err_deq_chan:
+ mutex_lock(&vmbus_connection.channel_mutex);
+- list_del(&newchannel->listentry);
++
++ /*
++ * We need to set the flag, otherwise
++ * vmbus_onoffer_rescind() can be blocked.
++ */
++ newchannel->probe_done = true;
++
++ if (primary_channel == NULL) {
++ list_del(&newchannel->listentry);
++ } else {
++ spin_lock_irqsave(&primary_channel->lock, flags);
++ list_del(&newchannel->sc_list);
++ spin_unlock_irqrestore(&primary_channel->lock, flags);
++ }
++
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ if (newchannel->target_cpu != get_cpu()) {
+ put_cpu();
+ smp_call_function_single(newchannel->target_cpu,
+- percpu_channel_deq, newchannel, true);
++ percpu_channel_deq,
++ newchannel, true);
+ } else {
+ percpu_channel_deq(newchannel);
+ put_cpu();
+@@ -570,14 +539,104 @@ err_deq_chan:
+
+ vmbus_release_relid(newchannel->offermsg.child_relid);
+
+-err_free_chan:
+ free_channel(newchannel);
+ }
+
++/*
++ * vmbus_process_offer - Process the offer by creating a channel/device
++ * associated with this offer
++ */
++static void vmbus_process_offer(struct vmbus_channel *newchannel)
++{
++ struct vmbus_channel *channel;
++ struct workqueue_struct *wq;
++ unsigned long flags;
++ bool fnew = true;
++
++ mutex_lock(&vmbus_connection.channel_mutex);
++
++ /*
++ * Now that we have acquired the channel_mutex,
++ * we can release the potentially racing rescind thread.
++ */
++ atomic_dec(&vmbus_connection.offer_in_progress);
++
++ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
++ if (!uuid_le_cmp(channel->offermsg.offer.if_type,
++ newchannel->offermsg.offer.if_type) &&
++ !uuid_le_cmp(channel->offermsg.offer.if_instance,
++ newchannel->offermsg.offer.if_instance)) {
++ fnew = false;
++ break;
++ }
++ }
++
++ if (fnew)
++ list_add_tail(&newchannel->listentry,
++ &vmbus_connection.chn_list);
++ else {
++ /*
++ * Check to see if this is a valid sub-channel.
++ */
++ if (newchannel->offermsg.offer.sub_channel_index == 0) {
++ mutex_unlock(&vmbus_connection.channel_mutex);
++ /*
++ * Don't call free_channel(), because newchannel->kobj
++ * is not initialized yet.
++ */
++ kfree(newchannel);
++ WARN_ON_ONCE(1);
++ return;
++ }
++ /*
++ * Process the sub-channel.
++ */
++ newchannel->primary_channel = channel;
++ spin_lock_irqsave(&channel->lock, flags);
++ list_add_tail(&newchannel->sc_list, &channel->sc_list);
++ spin_unlock_irqrestore(&channel->lock, flags);
++ }
++
++ mutex_unlock(&vmbus_connection.channel_mutex);
++
++ /*
++ * vmbus_process_offer() mustn't call channel->sc_creation_callback()
++ * directly for sub-channels, because sc_creation_callback() ->
++ * vmbus_open() may never get the host's response to the
++ * OPEN_CHANNEL message (the host may rescind a channel at any time,
++ * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
++ * may not wake up the vmbus_open() as it's blocked due to a non-zero
++ * vmbus_connection.offer_in_progress, and finally we have a deadlock.
++ *
++ * The above is also true for primary channels, if the related device
++ * drivers use sync probing mode by default.
++ *
++ * And, usually the handling of primary channels and sub-channels can
++ * depend on each other, so we should offload them to different
++ * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
++ * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
++ * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
++ * and waits for all the sub-channels to appear, but the latter
++ * can't get the rtnl_lock and this blocks the handling of
++ * sub-channels.
++ */
++ INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
++ wq = fnew ? vmbus_connection.handle_primary_chan_wq :
++ vmbus_connection.handle_sub_chan_wq;
++ queue_work(wq, &newchannel->add_channel_work);
++}
++
+ /*
+ * We use this state to statically distribute the channel interrupt load.
+ */
+ static int next_numa_node_id;
++/*
++ * init_vp_index() accesses global variables like next_numa_node_id, and
++ * it can run concurrently for primary channels and sub-channels: see
++ * vmbus_process_offer(), so we need the lock to protect the global
++ * variables.
++ */
++static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
+
+ /*
+ * Starting with Win8, we can statically distribute the incoming
+@@ -613,6 +672,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
+ return;
+ }
+
++ spin_lock(&bind_channel_to_cpu_lock);
++
+ /*
+ * Based on the channel affinity policy, we will assign the NUMA
+ * nodes.
+@@ -695,6 +756,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
+
+ channel->target_cpu = cur_cpu;
+ channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
++
++ spin_unlock(&bind_channel_to_cpu_lock);
+ }
+
+ static void vmbus_wait_for_unload(void)
+diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
+--- a/drivers/hv/connection.c
++++ b/drivers/hv/connection.c
+@@ -190,6 +190,20 @@ int vmbus_connect(void)
+ goto cleanup;
+ }
+
++ vmbus_connection.handle_primary_chan_wq =
++ create_workqueue("hv_pri_chan");
++ if (!vmbus_connection.handle_primary_chan_wq) {
++ ret = -ENOMEM;
++ goto cleanup;
++ }
++
++ vmbus_connection.handle_sub_chan_wq =
++ create_workqueue("hv_sub_chan");
++ if (!vmbus_connection.handle_sub_chan_wq) {
++ ret = -ENOMEM;
++ goto cleanup;
++ }
++
+ INIT_LIST_HEAD(&vmbus_connection.chn_msg_list);
+ spin_lock_init(&vmbus_connection.channelmsg_lock);
+
+@@ -280,10 +294,14 @@ void vmbus_disconnect(void)
+ */
+ vmbus_initiate_unload(false);
+
+- if (vmbus_connection.work_queue) {
+- drain_workqueue(vmbus_connection.work_queue);
++ if (vmbus_connection.handle_sub_chan_wq)
++ destroy_workqueue(vmbus_connection.handle_sub_chan_wq);
++
++ if (vmbus_connection.handle_primary_chan_wq)
++ destroy_workqueue(vmbus_connection.handle_primary_chan_wq);
++
++ if (vmbus_connection.work_queue)
+ destroy_workqueue(vmbus_connection.work_queue);
+- }
+
+ if (vmbus_connection.int_page) {
+ free_pages((unsigned long)vmbus_connection.int_page, 0);
+diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
+--- a/drivers/hv/hyperv_vmbus.h
++++ b/drivers/hv/hyperv_vmbus.h
+@@ -335,7 +335,14 @@ struct vmbus_connection {
+ struct list_head chn_list;
+ struct mutex channel_mutex;
+
++ /*
++ * An offer message is handled first on the work_queue, and then
++ * is further handled on handle_primary_chan_wq or
++ * handle_sub_chan_wq.
++ */
+ struct workqueue_struct *work_queue;
++ struct workqueue_struct *handle_primary_chan_wq;
++ struct workqueue_struct *handle_sub_chan_wq;
+ };
+
+
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -905,6 +905,13 @@ struct vmbus_channel {
+
+ bool probe_done;
+
++ /*
++ * We must offload the handling of the primary/sub channels
++ * from the single-threaded vmbus_connection.work_queue to
++ * two different workqueue, otherwise we can block
++ * vmbus_connection.work_queue and hang: see vmbus_process_offer().
++ */
++ struct work_struct add_channel_work;
+ };
+
+ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
diff --git a/patches.suse/msft-hv-1806-Drivers-hv-vmbus-Remove-the-useless-API-vmbus_get_ou.patch b/patches.suse/msft-hv-1806-Drivers-hv-vmbus-Remove-the-useless-API-vmbus_get_ou.patch
index d0b36dc9d8..a7c6594c95 100644
--- a/patches.suse/msft-hv-1806-Drivers-hv-vmbus-Remove-the-useless-API-vmbus_get_ou.patch
+++ b/patches.suse/msft-hv-1806-Drivers-hv-vmbus-Remove-the-useless-API-vmbus_get_ou.patch
@@ -52,14 +52,6 @@ diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
spin_unlock_irqrestore(&primary_channel->lock, flags);
}
-@@ -483,7 +482,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
- newchannel->primary_channel = channel;
- spin_lock_irqsave(&channel->lock, flags);
- list_add_tail(&newchannel->sc_list, &channel->sc_list);
-- channel->num_sc++;
- spin_unlock_irqrestore(&channel->lock, flags);
- } else {
- goto err_free_chan;
@@ -1239,49 +1237,6 @@ cleanup:
return ret;
}
diff --git a/series.conf b/series.conf
index 90e3e57cb0..d2123d5541 100644
--- a/series.conf
+++ b/series.conf
@@ -20363,6 +20363,7 @@
patches.fixes/kgdboc-fix-KASAN-global-out-of-bounds-bug-in-param_s.patch
patches.drivers/staging-rtl8712-Fix-possible-buffer-overrun.patch
patches.drivers/Revert-commit-ef9209b642f-staging-rtl8723bs-Fix-inde.patch
+ patches.suse/msft-hv-1803-Drivers-hv-vmbus-Offload-the-handling-of-channels-to.patch
patches.fixes/x86-efi-Allocate-e820-buffer-before-calling-efi_exit.patch
patches.arch/x86-build-Fix-compiler-support-check-for-CONFIG_RETP.patch
patches.suse/virtio-net-keep-vnet-header-zeroed-after-processing-.patch