Home Home > GIT Browse > SLE15-SP1
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Tesarik <ptesarik@suse.cz>2019-01-18 17:10:49 +0100
committerPetr Tesarik <ptesarik@suse.cz>2019-01-18 17:10:49 +0100
commit22e511140a3cd13f2e038fba372173487fc59bd3 (patch)
tree48d5a9ed122671e49ab9f5683e32055593783f07
parent7a46ca97b936adba1ab4d2c903e44f3fc07a511e (diff)
parentb8b605ef138bb13ce7c9b9800a2e8029007176a5 (diff)
Merge branch 'users/lhenriques/SLE15-SP1/for-next' into SLE15-SP1
Pull ceph fixes from Luis Henriques
-rw-r--r--patches.suse/ceph-cleanup-splice_dentry.patch158
-rw-r--r--patches.suse/ceph-don-t-encode-inode-pathes-into-reconnect-message.patch177
-rw-r--r--patches.suse/ceph-don-t-request-excl-caps-when-mount-is-readonly.patch36
-rw-r--r--patches.suse/ceph-remove-redundant-assignment.patch30
-rw-r--r--patches.suse/ceph-skip-updating-wanted-caps-if-caps-are-already-issued.patch67
-rw-r--r--patches.suse/ceph-update-wanted-caps-after-resuming-stale-session.patch216
-rw-r--r--patches.suse/libceph-drop-last_piece-logic-from-write_partial_message_data.patch55
-rw-r--r--patches.suse/libceph-switch-more-to-bool-in-ceph_tcp_sendmsg.patch26
-rw-r--r--patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch104
-rw-r--r--patches.suse/libceph-use-sock_no_sendpage-as-a-fallback-in-ceph_tcp_sendpage.patch72
-rw-r--r--series.conf10
11 files changed, 951 insertions, 0 deletions
diff --git a/patches.suse/ceph-cleanup-splice_dentry.patch b/patches.suse/ceph-cleanup-splice_dentry.patch
new file mode 100644
index 0000000000..22d78d5620
--- /dev/null
+++ b/patches.suse/ceph-cleanup-splice_dentry.patch
@@ -0,0 +1,158 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Thu, 25 Oct 2018 17:30:30 +0800
+Subject: ceph: cleanup splice_dentry()
+Git-commit: 2bf996ac48326645ffe5985edfca307838f8eafe
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+splice_dentry() may drop the original dentry and return other
+dentry. It relies on its caller to update pointer that points
+to the dropped dentry. This is error-prone.
+
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/inode.c | 60 ++++++++++++++++++++++----------------------------------
+ 1 file changed, 24 insertions(+), 36 deletions(-)
+
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -1097,8 +1097,9 @@ out_unlock:
+ * splice a dentry to an inode.
+ * caller must hold directory i_mutex for this to be safe.
+ */
+-static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
++static int splice_dentry(struct dentry **pdn, struct inode *in)
+ {
++ struct dentry *dn = *pdn;
+ struct dentry *realdn;
+
+ BUG_ON(d_inode(dn));
+@@ -1131,28 +1132,23 @@ static struct dentry *splice_dentry(stru
+ if (IS_ERR(realdn)) {
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
+- dn = realdn;
+- /*
+- * Caller should release 'dn' in the case of error.
+- * If 'req->r_dentry' is passed to this function,
+- * caller should leave 'req->r_dentry' untouched.
+- */
+- goto out;
+- } else if (realdn) {
++ return PTR_ERR(realdn);
++ }
++
++ if (realdn) {
+ dout("dn %p (%d) spliced with %p (%d) "
+ "inode %p ino %llx.%llx\n",
+ dn, d_count(dn),
+ realdn, d_count(realdn),
+ d_inode(realdn), ceph_vinop(d_inode(realdn)));
+ dput(dn);
+- dn = realdn;
++ *pdn = realdn;
+ } else {
+ BUG_ON(!ceph_dentry(dn));
+ dout("dn %p attached to %p ino %llx.%llx\n",
+ dn, d_inode(dn), ceph_vinop(d_inode(dn)));
+ }
+-out:
+- return dn;
++ return 0;
+ }
+
+ /*
+@@ -1339,7 +1335,12 @@ retry_lookup:
+ dout("dn %p gets new offset %lld\n", req->r_old_dentry,
+ ceph_dentry(req->r_old_dentry)->offset);
+
+- dn = req->r_old_dentry; /* use old_dentry */
++ /* swap r_dentry and r_old_dentry in case that
++ * splice_dentry() gets called later. This is safe
++ * because no other place will use them */
++ req->r_dentry = req->r_old_dentry;
++ req->r_old_dentry = dn;
++ dn = req->r_dentry;
+ }
+
+ /* null dentry? */
+@@ -1364,12 +1365,10 @@ retry_lookup:
+ if (d_really_is_negative(dn)) {
+ ceph_dir_clear_ordered(dir);
+ ihold(in);
+- dn = splice_dentry(dn, in);
+- if (IS_ERR(dn)) {
+- err = PTR_ERR(dn);
++ err = splice_dentry(&req->r_dentry, in);
++ if (err < 0)
+ goto done;
+- }
+- req->r_dentry = dn; /* may have spliced */
++ dn = req->r_dentry; /* may have spliced */
+ } else if (d_really_is_positive(dn) && d_inode(dn) != in) {
+ dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
+ dn, d_inode(dn), ceph_vinop(d_inode(dn)),
+@@ -1389,22 +1388,18 @@ retry_lookup:
+ } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP) &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
+- struct dentry *dn = req->r_dentry;
+ struct inode *dir = req->r_parent;
+
+ /* fill out a snapdir LOOKUPSNAP dentry */
+- BUG_ON(!dn);
+ BUG_ON(!dir);
+ BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
+- dout(" linking snapped dir %p to dn %p\n", in, dn);
++ BUG_ON(!req->r_dentry);
++ dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry);
+ ceph_dir_clear_ordered(dir);
+ ihold(in);
+- dn = splice_dentry(dn, in);
+- if (IS_ERR(dn)) {
+- err = PTR_ERR(dn);
++ err = splice_dentry(&req->r_dentry, in);
++ if (err < 0)
+ goto done;
+- }
+- req->r_dentry = dn; /* may have spliced */
+ } else if (rinfo->head->is_dentry) {
+ struct ceph_vino *ptvino = NULL;
+
+@@ -1668,8 +1663,6 @@ retry_lookup:
+ }
+
+ if (d_really_is_negative(dn)) {
+- struct dentry *realdn;
+-
+ if (ceph_security_xattr_deadlock(in)) {
+ dout(" skip splicing dn %p to inode %p"
+ " (security xattr deadlock)\n", dn, in);
+@@ -1678,13 +1671,9 @@ retry_lookup:
+ goto next_item;
+ }
+
+- realdn = splice_dentry(dn, in);
+- if (IS_ERR(realdn)) {
+- err = PTR_ERR(realdn);
+- d_drop(dn);
++ err = splice_dentry(&dn, in);
++ if (err < 0)
+ goto next_item;
+- }
+- dn = realdn;
+ }
+
+ ceph_dentry(dn)->offset = rde->offset;
+@@ -1700,8 +1689,7 @@ retry_lookup:
+ err = ret;
+ }
+ next_item:
+- if (dn)
+- dput(dn);
++ dput(dn);
+ }
+ out:
+ if (err == 0 && skipped == 0) {
diff --git a/patches.suse/ceph-don-t-encode-inode-pathes-into-reconnect-message.patch b/patches.suse/ceph-don-t-encode-inode-pathes-into-reconnect-message.patch
new file mode 100644
index 0000000000..b6976cfc71
--- /dev/null
+++ b/patches.suse/ceph-don-t-encode-inode-pathes-into-reconnect-message.patch
@@ -0,0 +1,177 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Thu, 13 Dec 2018 16:34:11 +0800
+Subject: ceph: don't encode inode pathes into reconnect message
+Git-commit: 5ccedf1ccd710ba32f36986b49eeb764e53e7ef1
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+mds hasn't used inode pathes since introducing inode backtrace.
+
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/mds_client.c | 96 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 50 insertions(+), 46 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -2958,11 +2958,8 @@ static int encode_caps_cb(struct inode *
+ struct ceph_inode_info *ci = cap->ci;
+ struct ceph_reconnect_state *recon_state = arg;
+ struct ceph_pagelist *pagelist = recon_state->pagelist;
+- char *path;
+- int pathlen, err;
+- u64 pathbase;
++ int err;
+ u64 snap_follows;
+- struct dentry *dentry;
+
+ dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
+ inode, ceph_vinop(inode), cap, cap->cap_id,
+@@ -2971,19 +2968,6 @@ static int encode_caps_cb(struct inode *
+ if (err)
+ return err;
+
+- dentry = d_find_alias(inode);
+- if (dentry) {
+- path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
+- if (IS_ERR(path)) {
+- err = PTR_ERR(path);
+- goto out_dput;
+- }
+- } else {
+- path = NULL;
+- pathlen = 0;
+- pathbase = 0;
+- }
+-
+ spin_lock(&ci->i_ceph_lock);
+ cap->seq = 0; /* reset cap seq */
+ cap->issue_seq = 0; /* and issue_seq */
+@@ -2995,7 +2979,7 @@ static int encode_caps_cb(struct inode *
+ rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+ rec.v2.issued = cpu_to_le32(cap->issued);
+ rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+- rec.v2.pathbase = cpu_to_le64(pathbase);
++ rec.v2.pathbase = 0;
+ rec.v2.flock_len = (__force __le32)
+ ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
+ } else {
+@@ -3006,7 +2990,7 @@ static int encode_caps_cb(struct inode *
+ ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime);
+ ceph_encode_timespec(&rec.v1.atime, &inode->i_atime);
+ rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+- rec.v1.pathbase = cpu_to_le64(pathbase);
++ rec.v1.pathbase = 0;
+ }
+
+ if (list_empty(&ci->i_cap_snaps)) {
+@@ -3037,7 +3021,7 @@ encode_again:
+ sizeof(struct ceph_filelock), GFP_NOFS);
+ if (!flocks) {
+ err = -ENOMEM;
+- goto out_free;
++ goto out_err;
+ }
+ err = ceph_encode_locks_to_buffer(inode, flocks,
+ num_fcntl_locks,
+@@ -3047,7 +3031,7 @@ encode_again:
+ flocks = NULL;
+ if (err == -ENOSPC)
+ goto encode_again;
+- goto out_free;
++ goto out_err;
+ }
+ } else {
+ kfree(flocks);
+@@ -3067,44 +3051,64 @@ encode_again:
+ sizeof(struct ceph_filelock);
+ rec.v2.flock_len = cpu_to_le32(struct_len);
+
+- struct_len += sizeof(rec.v2);
+- struct_len += sizeof(u32) + pathlen;
++ struct_len += sizeof(u32) + sizeof(rec.v2);
+
+ if (struct_v >= 2)
+ struct_len += sizeof(u64); /* snap_follows */
+
+ total_len += struct_len;
+ err = ceph_pagelist_reserve(pagelist, total_len);
+-
+- if (!err) {
+- if (recon_state->msg_version >= 3) {
+- ceph_pagelist_encode_8(pagelist, struct_v);
+- ceph_pagelist_encode_8(pagelist, 1);
+- ceph_pagelist_encode_32(pagelist, struct_len);
+- }
+- ceph_pagelist_encode_string(pagelist, path, pathlen);
+- ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
+- ceph_locks_to_pagelist(flocks, pagelist,
+- num_fcntl_locks,
+- num_flock_locks);
+- if (struct_v >= 2)
+- ceph_pagelist_encode_64(pagelist, snap_follows);
++ if (err) {
++ kfree(flocks);
++ goto out_err;
+ }
++
++ if (recon_state->msg_version >= 3) {
++ ceph_pagelist_encode_8(pagelist, struct_v);
++ ceph_pagelist_encode_8(pagelist, 1);
++ ceph_pagelist_encode_32(pagelist, struct_len);
++ }
++ ceph_pagelist_encode_string(pagelist, NULL, 0);
++ ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
++ ceph_locks_to_pagelist(flocks, pagelist,
++ num_fcntl_locks, num_flock_locks);
++ if (struct_v >= 2)
++ ceph_pagelist_encode_64(pagelist, snap_follows);
++
+ kfree(flocks);
+ } else {
+- size_t size = sizeof(u32) + pathlen + sizeof(rec.v1);
+- err = ceph_pagelist_reserve(pagelist, size);
+- if (!err) {
+- ceph_pagelist_encode_string(pagelist, path, pathlen);
+- ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
++ u64 pathbase = 0;
++ int pathlen = 0;
++ char *path = NULL;
++ struct dentry *dentry;
++
++ dentry = d_find_alias(inode);
++ if (dentry) {
++ path = ceph_mdsc_build_path(dentry,
++ &pathlen, &pathbase, 0);
++ dput(dentry);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ goto out_err;
++ }
++ rec.v1.pathbase = cpu_to_le64(pathbase);
+ }
++
++ err = ceph_pagelist_reserve(pagelist,
++ pathlen + sizeof(u32) + sizeof(rec.v1));
++ if (err) {
++ kfree(path);
++ goto out_err;
++ }
++
++ ceph_pagelist_encode_string(pagelist, path, pathlen);
++ ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
++
++ kfree(path);
+ }
+
+ recon_state->nr_caps++;
+-out_free:
+- kfree(path);
+-out_dput:
+- dput(dentry);
++out_err:
+ return err;
+ }
+
diff --git a/patches.suse/ceph-don-t-request-excl-caps-when-mount-is-readonly.patch b/patches.suse/ceph-don-t-request-excl-caps-when-mount-is-readonly.patch
new file mode 100644
index 0000000000..582165a2f5
--- /dev/null
+++ b/patches.suse/ceph-don-t-request-excl-caps-when-mount-is-readonly.patch
@@ -0,0 +1,36 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Wed, 5 Dec 2018 11:29:35 +0800
+Subject: ceph: don't request excl caps when mount is readonly
+Git-commit: 8a2ac3a8e9c04018e2bbff9d6ff038507e443e75
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/caps.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -1852,14 +1852,17 @@ retry_locked:
+ retain |= CEPH_CAP_ANY; /* be greedy */
+ } else if (S_ISDIR(inode->i_mode) &&
+ (issued & CEPH_CAP_FILE_SHARED) &&
+- __ceph_dir_is_complete(ci)) {
++ __ceph_dir_is_complete(ci)) {
+ /*
+ * If a directory is complete, we want to keep
+ * the exclusive cap. So that MDS does not end up
+ * revoking the shared cap on every create/unlink
+ * operation.
+ */
+- want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
++ if (IS_RDONLY(inode))
++ want = CEPH_CAP_ANY_SHARED;
++ else
++ want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+ retain |= want;
+ } else {
+
diff --git a/patches.suse/ceph-remove-redundant-assignment.patch b/patches.suse/ceph-remove-redundant-assignment.patch
new file mode 100644
index 0000000000..15adab7d60
--- /dev/null
+++ b/patches.suse/ceph-remove-redundant-assignment.patch
@@ -0,0 +1,30 @@
+From: Chengguang Xu <cgxu519@gmx.com>
+Date: Thu, 15 Nov 2018 22:27:01 +0800
+Subject: ceph: remove redundant assignment
+Git-commit: 0cab9f33d9c33f5ea94e5438fbc91bc8b7f48057
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+There is redundant assighment of variable i in
+ceph_mdsmap_get_random_mds(), just remvoe it.
+
+Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/mdsmap.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
+index 44e53abeb32a..1a2c5d390f7f 100644
+--- a/fs/ceph/mdsmap.c
++++ b/fs/ceph/mdsmap.c
+@@ -35,7 +35,6 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+
+ /* pick */
+ n = prandom_u32() % n;
+- i = 0;
+ for (i = 0; n > 0; i++, n--)
+ while (m->m_info[i].state <= 0)
+ i++;
+
diff --git a/patches.suse/ceph-skip-updating-wanted-caps-if-caps-are-already-issued.patch b/patches.suse/ceph-skip-updating-wanted-caps-if-caps-are-already-issued.patch
new file mode 100644
index 0000000000..385211fedf
--- /dev/null
+++ b/patches.suse/ceph-skip-updating-wanted-caps-if-caps-are-already-issued.patch
@@ -0,0 +1,67 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Thu, 22 Nov 2018 15:26:01 +0800
+Subject: ceph: skip updating 'wanted' caps if caps are already issued
+Git-commit: fdac94fab7995ebc52ff9c5b6247133c67a7564a
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+When reading cached inode that already has Fscr caps, this can avoid
+two cap messages (one updats 'wanted' caps, one clears 'wanted' caps).
+
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/caps.c | 27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -1970,8 +1970,7 @@ retry_locked:
+ goto ack;
+
+ /* things we might delay */
+- if ((cap->issued & ~retain) == 0 &&
+- cap->mds_wanted == want)
++ if ((cap->issued & ~retain) == 0)
+ continue; /* nope, all good */
+
+ if (no_delay)
+@@ -3047,7 +3046,8 @@ static void handle_cap_grant(struct inod
+ int used, wanted, dirty;
+ u64 size = le64_to_cpu(grant->size);
+ u64 max_size = le64_to_cpu(grant->max_size);
+- int check_caps = 0;
++ unsigned char check_caps = 0;
++ bool was_stale = cap->cap_gen < session->s_cap_gen;
+ bool wake = false;
+ bool writeback = false;
+ bool queue_trunc = false;
+@@ -3199,13 +3199,20 @@ static void handle_cap_grant(struct inod
+ ceph_cap_string(wanted),
+ ceph_cap_string(used),
+ ceph_cap_string(dirty));
+- if (wanted != le32_to_cpu(grant->wanted)) {
+- dout("mds wanted %s -> %s\n",
+- ceph_cap_string(le32_to_cpu(grant->wanted)),
+- ceph_cap_string(wanted));
+- /* imported cap may not have correct mds_wanted */
+- if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
+- check_caps = 1;
++
++ if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
++ (wanted & ~(cap->mds_wanted | newcaps))) {
++ /*
++ * If mds is importing cap, prior cap messages that update
++ * 'wanted' may get dropped by mds (migrate seq mismatch).
++ *
++ * We don't send cap message to update 'wanted' if what we
++ * want are already issued. If mds revokes caps, cap message
++ * that releases caps also tells mds what we want. But if
++ * caps got revoked by mds forcedly (session stale). We may
++ * haven't told mds what we want.
++ */
++ check_caps = 1;
+ }
+
+ /* revocation, grant, or no-op? */
diff --git a/patches.suse/ceph-update-wanted-caps-after-resuming-stale-session.patch b/patches.suse/ceph-update-wanted-caps-after-resuming-stale-session.patch
new file mode 100644
index 0000000000..6ab747b0ac
--- /dev/null
+++ b/patches.suse/ceph-update-wanted-caps-after-resuming-stale-session.patch
@@ -0,0 +1,216 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Mon, 10 Dec 2018 16:35:09 +0800
+Subject: ceph: update wanted caps after resuming stale session
+Git-commit: d2f8bb27c87945ab696bdaea25b0465dee94fb6d
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+mds contains an optimization, it does not re-issue stale caps if
+client does not want any cap.
+
+A special case of the optimization is that client wants some caps,
+but skipped updating 'wanted'. For this case, client needs to update
+'wanted' when stale session get renewed.
+
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ fs/ceph/caps.c | 40 +++++++++++++++++++++++-----------------
+ fs/ceph/mds_client.c | 35 ++++++++++++++++++++++++++---------
+ fs/ceph/mds_client.h | 14 ++++++++------
+ 3 files changed, 57 insertions(+), 32 deletions(-)
+
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -654,6 +654,9 @@ void ceph_add_cap(struct inode *inode,
+ session->s_nr_caps++;
+ spin_unlock(&session->s_cap_lock);
+ } else {
++ if (cap->cap_gen < session->s_cap_gen)
++ cap->issued = cap->implemented = CEPH_CAP_PIN;
++
+ /*
+ * auth mds of the inode changed. we received the cap export
+ * message, but still haven't received the cap import message.
+@@ -3062,21 +3065,6 @@ static void handle_cap_grant(struct inod
+
+
+ /*
+- * auth mds of the inode changed. we received the cap export message,
+- * but still haven't received the cap import message. handle_cap_export
+- * updated the new auth MDS' cap.
+- *
+- * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
+- * that was sent before the cap import message. So don't remove caps.
+- */
+- if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+- WARN_ON(cap != ci->i_auth_cap);
+- WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
+- seq = cap->seq;
+- newcaps |= cap->issued;
+- }
+-
+- /*
+ * If CACHE is being revoked, and we have no dirty buffers,
+ * try to invalidate (once). (If there are dirty buffers, we
+ * will invalidate _after_ writeback.)
+@@ -3095,6 +3083,24 @@ static void handle_cap_grant(struct inod
+ }
+ }
+
++ if (was_stale)
++ cap->issued = cap->implemented = CEPH_CAP_PIN;
++
++ /*
++ * auth mds of the inode changed. we received the cap export message,
++ * but still haven't received the cap import message. handle_cap_export
++ * updated the new auth MDS' cap.
++ *
++ * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
++ * that was sent before the cap import message. So don't remove caps.
++ */
++ if (ceph_seq_cmp(seq, cap->seq) <= 0) {
++ WARN_ON(cap != ci->i_auth_cap);
++ WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
++ seq = cap->seq;
++ newcaps |= cap->issued;
++ }
++
+ /* side effects now are allowed */
+ cap->cap_gen = session->s_cap_gen;
+ cap->seq = seq;
+@@ -3545,9 +3551,9 @@ retry:
+ goto out_unlock;
+
+ if (target < 0) {
+- __ceph_remove_cap(cap, false);
+- if (!ci->i_auth_cap)
++ if (cap->mds_wanted | cap->issued)
+ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
++ __ceph_remove_cap(cap, false);
+ goto out_unlock;
+ }
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -1231,13 +1231,13 @@ static int remove_session_caps_cb(struct
+ dout("removing cap %p, ci is %p, inode is %p\n",
+ cap, ci, &ci->vfs_inode);
+ spin_lock(&ci->i_ceph_lock);
++ if (cap->mds_wanted | cap->issued)
++ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
+ __ceph_remove_cap(cap, false);
+ if (!ci->i_auth_cap) {
+ struct ceph_cap_flush *cf;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+- ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
+-
+ if (ci->i_wrbuffer_ref > 0 &&
+ READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ invalidate = true;
+@@ -1354,6 +1354,12 @@ static void remove_session_caps(struct c
+ dispose_cap_releases(session->s_mdsc, &dispose);
+ }
+
++enum {
++ RECONNECT,
++ RENEWCAPS,
++ FORCE_RO,
++};
++
+ /*
+ * wake up any threads waiting on this session's caps. if the cap is
+ * old (didn't get renewed on the client reconnect), remove it now.
+@@ -1364,23 +1370,34 @@ static int wake_up_session_cb(struct ino
+ void *arg)
+ {
+ struct ceph_inode_info *ci = ceph_inode(inode);
++ unsigned long ev = (unsigned long)arg;
+
+- if (arg) {
++ if (ev == RECONNECT) {
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_wanted_max_size = 0;
+ ci->i_requested_max_size = 0;
+ spin_unlock(&ci->i_ceph_lock);
++ } else if (ev == RENEWCAPS) {
++ if (cap->cap_gen < cap->session->s_cap_gen) {
++ /* mds did not re-issue stale cap */
++ spin_lock(&ci->i_ceph_lock);
++ cap->issued = cap->implemented = CEPH_CAP_PIN;
++ /* make sure mds knows what we want */
++ if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
++ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
++ spin_unlock(&ci->i_ceph_lock);
++ }
++ } else if (ev == FORCE_RO) {
+ }
+ wake_up_all(&ci->i_cap_wq);
+ return 0;
+ }
+
+-static void wake_up_session_caps(struct ceph_mds_session *session,
+- int reconnect)
++static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
+ {
+ dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
+ iterate_session_caps(session, wake_up_session_cb,
+- (void *)(unsigned long)reconnect);
++ (void *)(unsigned long)ev);
+ }
+
+ /*
+@@ -1465,7 +1482,7 @@ static void renewed_caps(struct ceph_mds
+ spin_unlock(&session->s_cap_lock);
+
+ if (wake)
+- wake_up_session_caps(session, 0);
++ wake_up_session_caps(session, RENEWCAPS);
+ }
+
+ /*
+@@ -2845,7 +2862,7 @@ static void handle_session(struct ceph_m
+ spin_lock(&session->s_cap_lock);
+ session->s_readonly = true;
+ spin_unlock(&session->s_cap_lock);
+- wake_up_session_caps(session, 0);
++ wake_up_session_caps(session, FORCE_RO);
+ break;
+
+ case CEPH_SESSION_REJECT:
+@@ -3336,7 +3353,7 @@ static void check_new_map(struct ceph_md
+ pr_info("mds%d recovery completed\n", s->s_mds);
+ kick_requests(mdsc, i);
+ ceph_kick_flushing_caps(mdsc, s);
+- wake_up_session_caps(s, 1);
++ wake_up_session_caps(s, RECONNECT);
+ }
+ }
+
+--- a/fs/ceph/mds_client.h
++++ b/fs/ceph/mds_client.h
+@@ -16,14 +16,16 @@
+ #include <linux/ceph/auth.h>
+
+ /* The first 8 bits are reserved for old ceph releases */
+-#define CEPHFS_FEATURE_MIMIC 8
++#define CEPHFS_FEATURE_MIMIC 8
++#define CEPHFS_FEATURE_REPLY_ENCODING 9
++#define CEPHFS_FEATURE_RECLAIM_CLIENT 10
++#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11
+
+-#define CEPHFS_FEATURES_ALL { \
+- 0, 1, 2, 3, 4, 5, 6, 7, \
+- CEPHFS_FEATURE_MIMIC, \
++#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
++ 0, 1, 2, 3, 4, 5, 6, 7, \
++ CEPHFS_FEATURE_MIMIC, \
++ CEPHFS_FEATURE_LAZY_CAP_WANTED, \
+ }
+-
+-#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
+ #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+
+
diff --git a/patches.suse/libceph-drop-last_piece-logic-from-write_partial_message_data.patch b/patches.suse/libceph-drop-last_piece-logic-from-write_partial_message_data.patch
new file mode 100644
index 0000000000..3156d78acd
--- /dev/null
+++ b/patches.suse/libceph-drop-last_piece-logic-from-write_partial_message_data.patch
@@ -0,0 +1,55 @@
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 14 Nov 2018 12:24:01 +0100
+Subject: libceph: drop last_piece logic from write_partial_message_data()
+Git-commit: 1f6b821aef78e3d79e8d598ae59fc7e23fb6c563
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+last_piece is for the last piece in the current data item, not in the
+entire data payload of the message. This is harmful for messages with
+multiple data items. On top of that, we don't need to signal the end
+of a data payload either because it is always followed by a footer.
+
+We used to signal "more" unconditionally, until commit fe38a2b67bc6
+("libceph: start defining message data cursor"). Part of a large
+series, it introduced cursor->last_piece and also mistakenly inverted
+the hint by passing last_piece for "more". This was corrected with
+commit c2cfa1940097 ("libceph: Fix ceph_tcp_sendpage()'s more boolean
+usage").
+
+As it is, last_piece is not helping at all: because Nagle algorithm is
+disabled, for a simple message with two 512-byte data items we end up
+emitting three packets: front + first data item, second data item and
+footer. Go back to the original pre-fe38a2b67bc6 behavior -- a single
+packet in most cases.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ net/ceph/messenger.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1611,7 +1611,6 @@ static int write_partial_message_data(st
+ struct page *page;
+ size_t page_offset;
+ size_t length;
+- bool last_piece;
+ int ret;
+
+ if (!cursor->resid) {
+@@ -1619,10 +1618,9 @@ static int write_partial_message_data(st
+ continue;
+ }
+
+- page = ceph_msg_data_next(cursor, &page_offset, &length,
+- &last_piece);
+- ret = ceph_tcp_sendpage(con->sock, page, page_offset,
+- length, !last_piece);
++ page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
++ ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
++ true);
+ if (ret <= 0) {
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
diff --git a/patches.suse/libceph-switch-more-to-bool-in-ceph_tcp_sendmsg.patch b/patches.suse/libceph-switch-more-to-bool-in-ceph_tcp_sendmsg.patch
new file mode 100644
index 0000000000..c63078d54c
--- /dev/null
+++ b/patches.suse/libceph-switch-more-to-bool-in-ceph_tcp_sendmsg.patch
@@ -0,0 +1,26 @@
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 21 Nov 2018 18:56:40 +0100
+Subject: libceph: switch more to bool in ceph_tcp_sendmsg()
+Git-commit: 87349cdad963163b55cf7d327f5d47a647339838
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+Unlike in ceph_tcp_sendpage(), it's a bool.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ net/ceph/messenger.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -556,7 +556,7 @@ static int ceph_tcp_recvpage(struct sock
+ * shortly.
+ */
+ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
+- size_t kvlen, size_t len, int more)
++ size_t kvlen, size_t len, bool more)
+ {
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ int r;
diff --git a/patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch b/patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch
new file mode 100644
index 0000000000..bf54fc5551
--- /dev/null
+++ b/patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch
@@ -0,0 +1,104 @@
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 20 Nov 2018 15:44:00 +0100
+Subject: libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage()
+Git-commit: 433b0a12953bc1dfcb52febb186136395a65aad0
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+Prevent do_tcp_sendpages() from calling tcp_push() (at least) once per
+page. Instead, arrange for tcp_push() to be called (at least) once per
+data payload. This results in more MSS-sized packets and fewer packets
+overall (5-10% reduction in my tests with typical OSD request sizes).
+See commits 2f5338442425 ("tcp: allow splice() to build full TSO
+packets"), 35f9c09fe9c7 ("tcp: tcp_sendpages() should call tcp_push()
+once") and ae62ca7b0321 ("tcp: fix MSG_SENDPAGE_NOTLAST logic") for
+details.
+
+Here is an example of a packet size histogram for 128K OSD requests
+(MSS = 1448, top 5):
+
+Before:
+
+ SIZE COUNT
+ 1448 777700
+ 952 127915
+ 1200 39238
+ 1219 9806
+ 21 5675
+
+After:
+
+ SIZE COUNT
+ 1448 897280
+ 21 6201
+ 1019 2797
+ 643 2739
+ 376 2479
+
+We could do slightly better by explicitly corking the socket but it's
+not clear it's worth it.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ net/ceph/messenger.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -572,12 +572,15 @@ static int ceph_tcp_sendmsg(struct socke
+ return r;
+ }
+
++/*
++ * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
++ */
+ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+- int offset, size_t size, bool more)
++ int offset, size_t size, int more)
+ {
+ ssize_t (*sendpage)(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags);
+- int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : 0);
++ int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
+ int ret;
+
+ /*
+@@ -1571,6 +1574,7 @@ static int write_partial_message_data(st
+ struct ceph_msg *msg = con->out_msg;
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
++ int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ u32 crc;
+
+ dout("%s %p msg %p\n", __func__, con, msg);
+@@ -1599,8 +1603,10 @@ static int write_partial_message_data(st
+ }
+
+ page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
++ if (length == cursor->total_resid)
++ more = MSG_MORE;
+ ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
+- true);
++ more);
+ if (ret <= 0) {
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
+@@ -1630,13 +1636,16 @@ static int write_partial_message_data(st
+ */
+ static int write_partial_skip(struct ceph_connection *con)
+ {
++ int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ int ret;
+
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
+ while (con->out_skip > 0) {
+ size_t size = min(con->out_skip, (int) PAGE_SIZE);
+
+- ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
++ if (size == con->out_skip)
++ more = MSG_MORE;
++ ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
+ if (ret <= 0)
+ goto out;
+ con->out_skip -= ret;
diff --git a/patches.suse/libceph-use-sock_no_sendpage-as-a-fallback-in-ceph_tcp_sendpage.patch b/patches.suse/libceph-use-sock_no_sendpage-as-a-fallback-in-ceph_tcp_sendpage.patch
new file mode 100644
index 0000000000..ff6924f1a5
--- /dev/null
+++ b/patches.suse/libceph-use-sock_no_sendpage-as-a-fallback-in-ceph_tcp_sendpage.patch
@@ -0,0 +1,72 @@
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 16 Nov 2018 11:58:19 +0100
+Subject: libceph: use sock_no_sendpage() as a fallback in ceph_tcp_sendpage()
+Git-commit: 3239eb5215ebdef593a79316c9dbbdf8849166ec
+Patch-mainline: v5.0-rc1
+References: bsc#1122215
+
+sock_no_sendpage() makes the code cleaner.
+
+Also, don't set MSG_EOR. sendpage doesn't act on MSG_EOR on its own,
+it just honors the setting from the preceding sendmsg call by looking
+at ->eor in tcp_skb_can_collapse_to().
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ net/ceph/messenger.c | 32 ++++++--------------------------
+ 1 file changed, 6 insertions(+), 26 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -572,24 +572,12 @@ static int ceph_tcp_sendmsg(struct socke
+ return r;
+ }
+
+-static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
+- int offset, size_t size, bool more)
+-{
+- int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
+- int ret;
+-
+- ret = kernel_sendpage(sock, page, offset, size, flags);
+- if (ret == -EAGAIN)
+- ret = 0;
+-
+- return ret;
+-}
+-
+ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, bool more)
+ {
+- struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+- struct bio_vec bvec;
++ ssize_t (*sendpage)(struct socket *sock, struct page *page,
++ int offset, size_t size, int flags);
++ int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : 0);
+ int ret;
+
+ /*
+@@ -601,19 +589,11 @@ static int ceph_tcp_sendpage(struct sock
+ * triggers one of hardened usercopy checks.
+ */
+ if (page_count(page) >= 1 && !PageSlab(page))
+- return __ceph_tcp_sendpage(sock, page, offset, size, more);
+-
+- bvec.bv_page = page;
+- bvec.bv_offset = offset;
+- bvec.bv_len = size;
+-
+- if (more)
+- msg.msg_flags |= MSG_MORE;
++ sendpage = sock->ops->sendpage;
+ else
+- msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
++ sendpage = sock_no_sendpage;
+
+- iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+- ret = sock_sendmsg(sock, &msg);
++ ret = sendpage(sock, page, offset, size, flags);
+ if (ret == -EAGAIN)
+ ret = 0;
+
diff --git a/series.conf b/series.conf
index c242d4469d..8a449e850c 100644
--- a/series.conf
+++ b/series.conf
@@ -42492,7 +42492,17 @@
patches.drivers/ALSA-hda-realtek-Enable-the-headset-mic-auto-detecti.patch
patches.drivers/ALSA-hda-tegra-clear-pending-irq-handlers.patch
patches.suse/mm-speed-up-mremap-by-20x-on-large-regions.patch
+ patches.suse/ceph-cleanup-splice_dentry.patch
+ patches.suse/ceph-remove-redundant-assignment.patch
+ patches.suse/libceph-drop-last_piece-logic-from-write_partial_message_data.patch
+ patches.suse/libceph-use-sock_no_sendpage-as-a-fallback-in-ceph_tcp_sendpage.patch
+ patches.suse/libceph-use-msg_sendpage_notlast-with-ceph_tcp_sendpage.patch
+ patches.suse/libceph-switch-more-to-bool-in-ceph_tcp_sendmsg.patch
patches.fixes/ceph-don-t-update-importing-cap-s-mseq-when-handing-cap-export.patch
+ patches.suse/ceph-don-t-request-excl-caps-when-mount-is-readonly.patch
+ patches.suse/ceph-skip-updating-wanted-caps-if-caps-are-already-issued.patch
+ patches.suse/ceph-update-wanted-caps-after-resuming-stale-session.patch
+ patches.suse/ceph-don-t-encode-inode-pathes-into-reconnect-message.patch
patches.fixes/xfs-xfs_buf-drop-useless-LIST_HEAD.patch
patches.drivers/thermal-bcm2835-enable-hwmon-explicitly.patch
patches.fixes/0001-fbdev-fbmem-behave-better-with-small-rotated-display.patch