Home Home > GIT Browse > SLE15-SP1
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuis Henriques <lhenriques@suse.com>2019-01-16 16:12:01 +0000
committerLuis Henriques <lhenriques@suse.com>2019-01-18 11:21:09 +0000
commite032ffe5104b98931d86da0f8167b2942d7bf455 (patch)
treecda8fae9ee60f741593f681a73b8f790f0e3f8bd
parentaab550edd4b32288ed3ea2bae60bbb32fb0e37d1 (diff)
libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage()
(bsc#1122215).
-rw-r--r--patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch104
-rw-r--r--series.conf1
2 files changed, 105 insertions, 0 deletions
diff --git a/patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch b/patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch
new file mode 100644
index 0000000000..bf54fc5551
--- /dev/null
+++ b/patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch
@@ -0,0 +1,104 @@
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 20 Nov 2018 15:44:00 +0100
+Subject: libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage()
+Git-commit: 433b0a12953bc1dfcb52febb186136395a65aad0
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+Prevent do_tcp_sendpages() from calling tcp_push() (at least) once per
+page. Instead, arrange for tcp_push() to be called (at least) once per
+data payload. This results in more MSS-sized packets and fewer packets
+overall (5-10% reduction in my tests with typical OSD request sizes).
+See commits 2f5338442425 ("tcp: allow splice() to build full TSO
+packets"), 35f9c09fe9c7 ("tcp: tcp_sendpages() should call tcp_push()
+once") and ae62ca7b0321 ("tcp: fix MSG_SENDPAGE_NOTLAST logic") for
+details.
+
+Here is an example of a packet size histogram for 128K OSD requests
+(MSS = 1448, top 5):
+
+Before:
+
+ SIZE COUNT
+ 1448 777700
+ 952 127915
+ 1200 39238
+ 1219 9806
+ 21 5675
+
+After:
+
+ SIZE COUNT
+ 1448 897280
+ 21 6201
+ 1019 2797
+ 643 2739
+ 376 2479
+
+We could do slightly better by explicitly corking the socket but it's
+not clear it's worth it.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ net/ceph/messenger.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -572,12 +572,15 @@ static int ceph_tcp_sendmsg(struct socke
+ return r;
+ }
+
++/*
++ * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
++ */
+ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+- int offset, size_t size, bool more)
++ int offset, size_t size, int more)
+ {
+ ssize_t (*sendpage)(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags);
+- int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : 0);
++ int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
+ int ret;
+
+ /*
+@@ -1571,6 +1574,7 @@ static int write_partial_message_data(st
+ struct ceph_msg *msg = con->out_msg;
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
++ int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ u32 crc;
+
+ dout("%s %p msg %p\n", __func__, con, msg);
+@@ -1599,8 +1603,10 @@ static int write_partial_message_data(st
+ }
+
+ page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
++ if (length == cursor->total_resid)
++ more = MSG_MORE;
+ ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
+- true);
++ more);
+ if (ret <= 0) {
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
+@@ -1630,13 +1636,16 @@ static int write_partial_message_data(st
+ */
+ static int write_partial_skip(struct ceph_connection *con)
+ {
++ int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ int ret;
+
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
+ while (con->out_skip > 0) {
+ size_t size = min(con->out_skip, (int) PAGE_SIZE);
+
+- ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
++ if (size == con->out_skip)
++ more = MSG_MORE;
++ ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
+ if (ret <= 0)
+ goto out;
+ con->out_skip -= ret;
diff --git a/series.conf b/series.conf
index 77ada213d2..d18c2469f1 100644
--- a/series.conf
+++ b/series.conf
@@ -42496,6 +42496,7 @@
patches.suse/ceph-remove-redundant-assignment.patch
patches.suse/libceph-drop-last_piece-logic-from-write_partial_message_data.patch
patches.suse/libceph-use-sock_no_sendpage-as-a-fallback-in-ceph_tcp_sendpage.patch
+ patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch
patches.fixes/ceph-don-t-update-importing-cap-s-mseq-when-handing-cap-export.patch
patches.fixes/xfs-xfs_buf-drop-useless-LIST_HEAD.patch
patches.drivers/thermal-bcm2835-enable-hwmon-explicitly.patch