Home Home > GIT Browse > stable
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiri Slaby <jslaby@suse.cz>2019-02-15 10:23:48 +0100
committerJiri Slaby <jslaby@suse.cz>2019-02-15 10:24:00 +0100
commit074e4928f1f45f66474496880e8b72d50f16b74a (patch)
treef694b11c2a0e11a4140739487c8f8eef652285c3
parentf56945ec9570d6d0beb8c18928a5e9a0c431a18e (diff)
svcrdma: Remove max_sge check at connect time (bnc#1012628).
-rw-r--r--patches.kernel.org/4.20.9-018-svcrdma-Remove-max_sge-check-at-connect-time.patch206
-rw-r--r--series.conf1
2 files changed, 207 insertions, 0 deletions
diff --git a/patches.kernel.org/4.20.9-018-svcrdma-Remove-max_sge-check-at-connect-time.patch b/patches.kernel.org/4.20.9-018-svcrdma-Remove-max_sge-check-at-connect-time.patch
new file mode 100644
index 0000000000..8e4ee12287
--- /dev/null
+++ b/patches.kernel.org/4.20.9-018-svcrdma-Remove-max_sge-check-at-connect-time.patch
@@ -0,0 +1,206 @@
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Fri, 25 Jan 2019 16:54:54 -0500
+Subject: [PATCH] svcrdma: Remove max_sge check at connect time
+References: bnc#1012628
+Patch-mainline: 4.20.9
+Git-commit: e248aa7be86e8179f20ac0931774ecd746f3f5bf
+
+commit e248aa7be86e8179f20ac0931774ecd746f3f5bf upstream.
+
+Two and a half years ago, the client was changed to use gathered
+Send for larger inline messages, in commit 655fec6987b ("xprtrdma:
+Use gathered Send for large inline messages"). Several fixes were
+required because there are a few in-kernel device drivers whose
+max_sge is 3, and these were broken by the change.
+
+Apparently my memory is going, because some time later, I submitted
+commit 25fd86eca11c ("svcrdma: Don't overrun the SGE array in
+svc_rdma_send_ctxt"), and after that, commit f3c1fd0ee294 ("svcrdma:
+Reduce max_send_sges"). These too incorrectly assumed in-kernel
+device drivers would have more than a few Send SGEs available.
+
+The fix for the server side is not the same. This is because the
+fundamental problem on the server is that, whether or not the client
+has provisioned a chunk for the RPC reply, the server must squeeze
+even the most complex RPC replies into a single RDMA Send. Failing
+in the send path because of Send SGE exhaustion should never be an
+option.
+
+Therefore, instead of failing when the send path runs out of SGEs,
+switch to using a bounce buffer mechanism to handle RPC replies that
+are too complex for the device to send directly. That allows us to
+remove the max_sge check to enable drivers with small max_sge to
+work again.
+
+Reported-by: Don Dutile <ddutile@redhat.com>
+Fixes: 25fd86eca11c ("svcrdma: Don't overrun the SGE array in ...")
+Cc: stable@vger.kernel.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 105 +++++++++++++++++++++--
+ net/sunrpc/xprtrdma/svc_rdma_transport.c | 9 +-
+ 2 files changed, 102 insertions(+), 12 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+index 8602a5f1b515..e8ad7ddf347a 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -563,6 +563,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
+ DMA_TO_DEVICE);
+ }
+
++/* If the xdr_buf has more elements than the device can
++ * transmit in a single RDMA Send, then the reply will
++ * have to be copied into a bounce buffer.
++ */
++static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
++ struct xdr_buf *xdr,
++ __be32 *wr_lst)
++{
++ int elements;
++
++ /* xdr->head */
++ elements = 1;
++
++ /* xdr->pages */
++ if (!wr_lst) {
++ unsigned int remaining;
++ unsigned long pageoff;
++
++ pageoff = xdr->page_base & ~PAGE_MASK;
++ remaining = xdr->page_len;
++ while (remaining) {
++ ++elements;
++ remaining -= min_t(u32, PAGE_SIZE - pageoff,
++ remaining);
++ pageoff = 0;
++ }
++ }
++
++ /* xdr->tail */
++ if (xdr->tail[0].iov_len)
++ ++elements;
++
++ /* assume 1 SGE is needed for the transport header */
++ return elements >= rdma->sc_max_send_sges;
++}
++
++/* The device is not capable of sending the reply directly.
++ * Assemble the elements of @xdr into the transport header
++ * buffer.
++ */
++static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
++ struct svc_rdma_send_ctxt *ctxt,
++ struct xdr_buf *xdr, __be32 *wr_lst)
++{
++ unsigned char *dst, *tailbase;
++ unsigned int taillen;
++
++ dst = ctxt->sc_xprt_buf;
++ dst += ctxt->sc_sges[0].length;
++
++ memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
++ dst += xdr->head[0].iov_len;
++
++ tailbase = xdr->tail[0].iov_base;
++ taillen = xdr->tail[0].iov_len;
++ if (wr_lst) {
++ u32 xdrpad;
++
++ xdrpad = xdr_padsize(xdr->page_len);
++ if (taillen && xdrpad) {
++ tailbase += xdrpad;
++ taillen -= xdrpad;
++ }
++ } else {
++ unsigned int len, remaining;
++ unsigned long pageoff;
++ struct page **ppages;
++
++ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
++ pageoff = xdr->page_base & ~PAGE_MASK;
++ remaining = xdr->page_len;
++ while (remaining) {
++ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
++
++ memcpy(dst, page_address(*ppages), len);
++ remaining -= len;
++ dst += len;
++ pageoff = 0;
++ }
++ }
++
++ if (taillen)
++ memcpy(dst, tailbase, taillen);
++
++ ctxt->sc_sges[0].length += xdr->len;
++ ib_dma_sync_single_for_device(rdma->sc_pd->device,
++ ctxt->sc_sges[0].addr,
++ ctxt->sc_sges[0].length,
++ DMA_TO_DEVICE);
++
++ return 0;
++}
++
+ /* svc_rdma_map_reply_msg - Map the buffer holding RPC message
+ * @rdma: controlling transport
+ * @ctxt: send_ctxt for the Send WR
+@@ -585,8 +678,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+ u32 xdr_pad;
+ int ret;
+
+- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
+- return -EIO;
++ if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
++ return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
++
++ ++ctxt->sc_cur_sge_no;
+ ret = svc_rdma_dma_map_buf(rdma, ctxt,
+ xdr->head[0].iov_base,
+ xdr->head[0].iov_len);
+@@ -617,8 +712,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - page_off, remaining);
+
+- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
+- return -EIO;
++ ++ctxt->sc_cur_sge_no;
+ ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
+ page_off, len);
+ if (ret < 0)
+@@ -632,8 +726,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+ len = xdr->tail[0].iov_len;
+ tail:
+ if (len) {
+- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
+- return -EIO;
++ ++ctxt->sc_cur_sge_no;
+ ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
+ if (ret < 0)
+ return ret;
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index 2f7ec8912f49..ce5c610b49c7 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -478,12 +478,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
+ /* Transport header, head iovec, tail iovec */
+ newxprt->sc_max_send_sges = 3;
+ /* Add one SGE per page list entry */
+- newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE;
+- if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) {
+- pr_err("svcrdma: too few Send SGEs available (%d needed)\n",
+- newxprt->sc_max_send_sges);
+- goto errout;
+- }
++ newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
++ if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
++ newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = svcrdma_max_requests;
+ newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
+--
+2.20.1
+
diff --git a/series.conf b/series.conf
index 8fff8259a9..ffa6e3f8fe 100644
--- a/series.conf
+++ b/series.conf
@@ -1100,6 +1100,7 @@
patches.kernel.org/4.20.9-015-mei-me-add-ice-lake-point-device-id.patch
patches.kernel.org/4.20.9-016-samples-mei-use-dev-mei0-instead-of-dev-mei.patch
patches.kernel.org/4.20.9-017-debugfs-fix-debugfs_rename-parameter-checking.patch
+ patches.kernel.org/4.20.9-018-svcrdma-Remove-max_sge-check-at-connect-time.patch
########################################################
# Build fixes that apply to the vanilla kernel too.