Home Home > GIT Browse > openSUSE-15.1
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Bogendoerfer <tbogendoerfer@suse.de>2019-06-07 13:27:44 +0200
committerThomas Bogendoerfer <tbogendoerfer@suse.de>2019-06-07 13:29:00 +0200
commit727f17d530f66d01cfeab1fcc0f6b03173b8317f (patch)
tree83636f98ae566eb091a2cd360ecaa2936f081836
parentabad277c79e7488b513f7358b8f91e5d2cfaccf2 (diff)
RDMA/hns: Fix the chip hanging caused by sending doorbell
during reset (bsc#1104427 FATE#326416 bsc#1137232).
-rw-r--r--patches.drivers/RDMA-hns-Fix-the-chip-hanging-caused-by-sending-door.patch168
-rw-r--r--series.conf1
2 files changed, 169 insertions, 0 deletions
diff --git a/patches.drivers/RDMA-hns-Fix-the-chip-hanging-caused-by-sending-door.patch b/patches.drivers/RDMA-hns-Fix-the-chip-hanging-caused-by-sending-door.patch
new file mode 100644
index 0000000000..b0f860e779
--- /dev/null
+++ b/patches.drivers/RDMA-hns-Fix-the-chip-hanging-caused-by-sending-door.patch
@@ -0,0 +1,168 @@
+From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>
+Date: Sun, 3 Feb 2019 20:43:15 +0800
+Subject: RDMA/hns: Fix the chip hanging caused by sending doorbell during
+ reset
+Patch-mainline: v5.1-rc1
+Git-commit: d3743fa94ccd177917783726faf54632439ddb54
+References: bsc#1104427 FATE#326416 bsc#1137232
+
+On hi08 chip, There is a possibility of chip hanging when sending doorbell
+during reset. We can fix it by prohibiting doorbell during reset.
+
+Fixes: 2d40788825ac ("RDMA/hns: Add support for processing send wr and receive wr")
+Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
+Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
+Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
+---
+ drivers/infiniband/hw/hns/hns_roce_device.h | 1 +
+ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 25 ++++++++++++++++---------
+ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 11 +++++++++++
+ 3 files changed, 28 insertions(+), 9 deletions(-)
+
+--- a/drivers/infiniband/hw/hns/hns_roce_device.h
++++ b/drivers/infiniband/hw/hns/hns_roce_device.h
+@@ -946,6 +946,7 @@ struct hns_roce_dev {
+ spinlock_t bt_cmd_lock;
+ bool active;
+ bool is_reset;
++ bool dis_db;
+ unsigned long reset_cnt;
+ struct hns_roce_ib_iboe iboe;
+
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+@@ -587,7 +587,7 @@ out:
+ roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
+ V2_DB_PARAMETER_SL_S, qp->sl);
+
+- hns_roce_write64_k((__le32 *)&sq_db, qp->sq.db_reg_l);
++ hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
+
+ qp->sq_next_wqe = ind;
+ qp->next_sge = sge_ind;
+@@ -717,7 +717,7 @@ static int hns_roce_v2_cmd_hw_reseted(st
+ unsigned long reset_stage)
+ {
+ /* When hardware reset has been completed once or more, we should stop
+- * sending mailbox&cmq to hardware. If now in .init_instance()
++ * sending mailbox&cmq&doorbell to hardware. If now in .init_instance()
+ * function, we should exit with error. If now at HNAE3_INIT_CLIENT
+ * stage of soft reset process, we should exit with error, and then
+ * HNAE3_INIT_CLIENT related process can rollback the operation like
+@@ -726,6 +726,7 @@ static int hns_roce_v2_cmd_hw_reseted(st
+ * reset process once again.
+ */
+ hr_dev->is_reset = true;
++ hr_dev->dis_db = true;
+
+ if (reset_stage == HNS_ROCE_STATE_RST_INIT ||
+ instance_stage == HNS_ROCE_STATE_INIT)
+@@ -742,8 +743,8 @@ static int hns_roce_v2_cmd_hw_resetting(
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+- /* When hardware reset is detected, we should stop sending mailbox&cmq
+- * to hardware. If now in .init_instance() function, we should
++ /* When hardware reset is detected, we should stop sending mailbox&cmq&
++ * doorbell to hardware. If now in .init_instance() function, we should
+ * exit with error. If now at HNAE3_INIT_CLIENT stage of soft reset
+ * process, we should exit with error, and then HNAE3_INIT_CLIENT
+ * related process can rollback the operation like notifing hardware to
+@@ -751,6 +752,7 @@ static int hns_roce_v2_cmd_hw_resetting(
+ * error to notify NIC driver to reschedule soft reset process once
+ * again.
+ */
++ hr_dev->dis_db = true;
+ if (!ops->get_hw_reset_stat(handle))
+ hr_dev->is_reset = true;
+
+@@ -768,9 +770,10 @@ static int hns_roce_v2_cmd_sw_resetting(
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ /* When software reset is detected at .init_instance() function, we
+- * should stop sending mailbox&cmq to hardware, and exit with
+- * error.
++ * should stop sending mailbox&cmq&doorbell to hardware, and exit
++ * with error.
+ */
++ hr_dev->dis_db = true;
+ if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt)
+ hr_dev->is_reset = true;
+
+@@ -2495,6 +2498,7 @@ static void hns_roce_v2_write_cqc(struct
+ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags flags)
+ {
++ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ u32 notification_flag;
+ u32 doorbell[2];
+@@ -2520,7 +2524,7 @@ static int hns_roce_v2_req_notify_cq(str
+ roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S,
+ notification_flag);
+
+- hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
++ hns_roce_write64(hr_dev, doorbell, hr_cq->cq_db_l);
+
+ return 0;
+ }
+@@ -4763,6 +4767,7 @@ static void hns_roce_v2_init_irq_work(st
+
+ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
+ {
++ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ u32 doorbell[2];
+
+ doorbell[0] = 0;
+@@ -4789,7 +4794,7 @@ static void set_eq_cons_index_v2(struct
+ HNS_ROCE_V2_EQ_DB_PARA_S,
+ (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M));
+
+- hns_roce_write64_k(doorbell, eq->doorbell);
++ hns_roce_write64(hr_dev, doorbell, eq->doorbell);
+ }
+
+ static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
+@@ -6011,6 +6016,7 @@ static int hns_roce_v2_post_srq_recv(str
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+ {
++ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_v2_db srq_db;
+@@ -6072,7 +6078,7 @@ static int hns_roce_v2_post_srq_recv(str
+ srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
+ srq_db.parameter = srq->head;
+
+- hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);
++ hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
+
+ }
+
+@@ -6291,6 +6297,7 @@ static int hns_roce_hw_v2_reset_notify_d
+ return 0;
+
+ hr_dev->active = false;
++ hr_dev->dis_db = true;
+
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.device = &hr_dev->ib_dev;
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+@@ -1799,4 +1799,15 @@ struct hns_roce_sccc_clr_done {
+ __le32 rsv[5];
+ };
+
++static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
++ void __iomem *dest)
++{
++ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
++ struct hnae3_handle *handle = priv->handle;
++ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
++
++ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
++ hns_roce_write64_k(val, dest);
++}
++
+ #endif
diff --git a/series.conf b/series.conf
index 4ae9b262e8..2c57d72d66 100644
--- a/series.conf
+++ b/series.conf
@@ -45482,6 +45482,7 @@
patches.drivers/RDMA-hns-Make-some-function-static.patch
patches.drivers/RDMA-hns-Fix-the-Oops-during-rmmod-or-insmod-ko-when.patch
patches.drivers/RDMA-hns-Fix-the-chip-hanging-caused-by-sending-mail.patch
+ patches.drivers/RDMA-hns-Fix-the-chip-hanging-caused-by-sending-door.patch
patches.drivers/iw_cxgb-kzalloc-the-iwcm-verbs-struct.patch
patches.drivers/IB-mlx5-Do-not-use-hw_access_flags-for-be-and-CPU-da.patch
patches.drivers/RDMA-bnxt_re-Add-chip-context-to-identify-57500-seri.patch