Home Home > GIT Browse > openSUSE-15.0
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHannes Reinecke <hare@suse.de>2016-10-25 12:58:20 +0200
committerHannes Reinecke <hare@suse.de>2016-10-25 12:58:20 +0200
commit9464f6778611f6acbee46b9e9e277b11f53ec697 (patch)
tree4cd35c993412da577341340baa1cd31fdafa87d1
parentf649d0691314b57ebed03c3bd160688cc282115b (diff)
mlx4: Do not BUG_ON() if device reset failed (bsc#1001888).rpm-4.4.21-69
-rw-r--r--patches.suse/mlx4-Do-not-BUG_ON-if-device-reset-failed.patch46
-rw-r--r--series.conf1
2 files changed, 47 insertions, 0 deletions
diff --git a/patches.suse/mlx4-Do-not-BUG_ON-if-device-reset-failed.patch b/patches.suse/mlx4-Do-not-BUG_ON-if-device-reset-failed.patch
new file mode 100644
index 0000000000..20e903a19c
--- /dev/null
+++ b/patches.suse/mlx4-Do-not-BUG_ON-if-device-reset-failed.patch
@@ -0,0 +1,46 @@
+From 023e3642122347304ec184cd74adccf85d0992f7 Mon Sep 17 00:00:00 2001
+From: Hannes Reinecke <hare@suse.de>
+Date: Tue, 25 Oct 2016 08:54:28 +0200
+Subject: mlx4: Do not BUG_ON() if device reset failed
+References: bsc#1001888
+Patch-Mainline: no, temporary fix for SLES12 SP2 GMC
+
+If for some reason the device reset failed we should just disable
+the device, not crash.
+
+Signed-off-by: Hannes Reinecke <hare@suse.com>
+---
+ drivers/net/ethernet/mellanox/mlx4/catas.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
+index c7e9399..b4ec016 100644
+--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
++++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
+@@ -183,17 +183,19 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
+ else
+ err = mlx4_reset_master(dev);
+
++ dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
++ mutex_unlock(&persist->device_state_mutex);
+ if (!err) {
+ mlx4_err(dev, "device was reset successfully\n");
+ } else {
+ /* EEH could have disabled the PCI channel during reset. That's
+ * recoverable and the PCI error flow will handle it.
+ */
+- if (!pci_channel_offline(dev->persist->pdev))
+- BUG_ON(1);
++ if (!pci_channel_offline(dev->persist->pdev)) {
++ mlx4_err(dev, "device reset failed with err %d\n", err);
++ return;
++ }
+ }
+- dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
+- mutex_unlock(&persist->device_state_mutex);
+
+ /* At that step HW was already reset, now notify clients */
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+--
+1.8.5.6
+
diff --git a/series.conf b/series.conf
index 637f9c368b..15a9444238 100644
--- a/series.conf
+++ b/series.conf
@@ -3027,6 +3027,7 @@
patches.drivers/net-mlx4_en-Move-filters-cleanup-to-a-proper-locatio.patch
patches.drivers/net-mlx4_en-Add-resilience-in-low-memory-systems.patch
patches.drivers/net-mlx4_core-Check-device-state-before-unregisterin.patch
+ patches.suse/mlx4-Do-not-BUG_ON-if-device-reset-failed.patch
# enic (Cisco)
patches.fixes/enic-set-netdev-vlan_features.patch