Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2019-03-20 11:15:59 +0000
committerFilipe Manana <fdmanana@suse.com>2019-03-20 11:15:59 +0000
commit773794e7fad4a2acc6f8e48d2532793ab1295f1e (patch)
treefab95f01b01b7db91661cb238193a950e2a969b7
parente1d910066fd394492356f6de5fbc48dc69a04283 (diff)
Btrfs: fix fsync after succession of renames and unlink/rmdir
(bsc#1126488).
-rw-r--r--patches.suse/btrfs-fix-fsync-after-succession-of-renames-and-unli.patch241
-rw-r--r--series.conf1
2 files changed, 242 insertions, 0 deletions
diff --git a/patches.suse/btrfs-fix-fsync-after-succession-of-renames-and-unli.patch b/patches.suse/btrfs-fix-fsync-after-succession-of-renames-and-unli.patch
new file mode 100644
index 0000000000..defbb8b91f
--- /dev/null
+++ b/patches.suse/btrfs-fix-fsync-after-succession-of-renames-and-unli.patch
@@ -0,0 +1,241 @@
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 13 Feb 2019 12:14:09 +0000
+Patch-mainline: 5.1-rc1
+Git-commit: a3baaf0d786e22fc86295fda9c58ba0dee07599f
+References: bsc#1126488
+Subject: [PATCH] Btrfs: fix fsync after succession of renames and
+ unlink/rmdir
+
+After a succession of renames operations of different files and unlinking
+one of them, if we fsync one of the renamed files we can end up with a
+log that will either fail to replay at mount time or result in a filesystem
+that is in an inconsistent state. One example scenario:
+
+ $ mkfs.btrfs -f /dev/sdb
+ $ mount /dev/sdb /mnt
+
+ $ mkdir /mnt/testdir
+ $ touch /mnt/testdir/fname1
+ $ touch /mnt/testdir/fname2
+
+ $ sync
+
+ $ mv /mnt/testdir/fname1 /mnt/testdir/fname3
+ $ rm -f /mnt/testdir/fname2
+ $ ln /mnt/testdir/fname3 /mnt/testdir/fname2
+
+ $ touch /mnt/testdir/fname1
+ $ xfs_io -c "fsync" /mnt/testdir/fname1
+
+ <power failure>
+
+ $ mount /dev/sdb /mnt
+ $ umount /mnt
+ $ btrfs check /dev/sdb
+ [1/7] checking root items
+ [2/7] checking extents
+ [3/7] checking free space cache
+ [4/7] checking fs roots
+ root 5 inode 259 errors 2, no orphan item
+ ERROR: errors found in fs roots
+ Opening filesystem to check...
+ Checking filesystem on /dev/sdc
+ UUID: 20e4abb8-5a19-4492-8bb4-6084125c2d0d
+ found 393216 bytes used, error(s) found
+ total csum bytes: 0
+ total tree bytes: 131072
+ total fs tree bytes: 32768
+ total extent tree bytes: 16384
+ btree space waste bytes: 122986
+ file data blocks allocated: 262144
+ referenced 262144
+
+On a kernel without the first patch in this series, titled
+"[PATCH] Btrfs: fix fsync after succession of renames of different files",
+we get instead an error when mounting the filesystem due to failure of
+replaying the log:
+
+ $ mount /dev/sdb /mnt
+ mount: mount /dev/sdb on /mnt failed: File exists
+
+Fix this by logging the parent directory of an inode whenever we find an
+inode that no longer exists (was unlinked in the current transaction),
+during the procedure which finds inodes that have old names that collide
+with new names of other inodes.
+
+A test case for fstests follows soon.
+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/tree-log.c | 49 ++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 37 insertions(+), 12 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index ce394b750544..9ba022189666 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -40,6 +40,7 @@
+ #define LOG_INODE_ALL 0
+ #define LOG_INODE_EXISTS 1
+ #define LOG_OTHER_INODE 2
++#define LOG_OTHER_INODE_ALL 3
+
+ /*
+ * directory trouble cases
+@@ -4957,7 +4958,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+ const int slot,
+ const struct btrfs_key *key,
+ struct btrfs_inode *inode,
+- u64 *other_ino)
++ u64 *other_ino, u64 *other_parent)
+ {
+ int ret;
+ struct btrfs_path *search_path;
+@@ -5023,6 +5024,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+ if (di_key.objectid != key->objectid) {
+ ret = 1;
+ *other_ino = di_key.objectid;
++ *other_parent = parent;
+ } else {
+ ret = 0;
+ }
+@@ -5047,6 +5049,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+
+ struct btrfs_ino_list {
+ u64 ino;
++ u64 parent;
+ struct list_head list;
+ };
+
+@@ -5054,7 +5057,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_log_ctx *ctx,
+- u64 ino)
++ u64 ino, u64 parent)
+ {
+ struct btrfs_ino_list *ino_elem;
+ LIST_HEAD(inode_list);
+@@ -5064,6 +5067,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ if (!ino_elem)
+ return -ENOMEM;
+ ino_elem->ino = ino;
++ ino_elem->parent = parent;
+ list_add_tail(&ino_elem->list, &inode_list);
+
+ while (!list_empty(&inode_list)) {
+@@ -5074,6 +5078,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ ino_elem = list_first_entry(&inode_list, struct btrfs_ino_list,
+ list);
+ ino = ino_elem->ino;
++ parent = ino_elem->parent;
+ list_del(&ino_elem->list);
+ kfree(ino_elem);
+ if (ret)
+@@ -5087,13 +5092,25 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ /*
+ * If the other inode that had a conflicting dir entry was
+- * deleted in the current transaction, we don't need to do more
+- * work nor fallback to a transaction commit.
++ * deleted in the current transaction, we need to log its parent
++ * directory.
+ */
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+- if (ret == -ENOENT)
+- ret = 0;
++ if (ret == -ENOENT) {
++ key.objectid = parent;
++ inode = btrfs_iget(fs_info->sb, &key, root,
++ NULL);
++ if (IS_ERR(inode)) {
++ ret = PTR_ERR(inode);
++ } else {
++ ret = btrfs_log_inode(trans, root,
++ BTRFS_I(inode),
++ LOG_OTHER_INODE_ALL,
++ 0, LLONG_MAX, ctx);
++ iput(inode);
++ }
++ }
+ continue;
+ }
+ /*
+@@ -5123,6 +5140,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ u64 other_ino = 0;
++ u64 other_parent = 0;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+@@ -5144,7 +5162,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ }
+
+ ret = btrfs_check_ref_name_override(leaf, slot, &key,
+- BTRFS_I(inode), &other_ino);
++ BTRFS_I(inode), &other_ino,
++ &other_parent);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+@@ -5154,6 +5173,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ break;
+ }
+ ino_elem->ino = other_ino;
++ ino_elem->parent = other_parent;
+ list_add_tail(&ino_elem->list, &inode_list);
+ ret = 0;
+ }
+@@ -5206,7 +5226,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+ u64 logged_isize = 0;
+ bool need_log_inode_item = true;
+ bool xattrs_logged = false;
+- bool recursive_logging = (inode_only == LOG_OTHER_INODE);
++ bool recursive_logging = false;
+
+ path = btrfs_alloc_path();
+ if (!path)
+@@ -5252,8 +5272,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+ return ret;
+ }
+
+- if (inode_only == LOG_OTHER_INODE) {
+- inode_only = LOG_INODE_EXISTS;
++ if (inode_only == LOG_OTHER_INODE || inode_only == LOG_OTHER_INODE_ALL) {
++ recursive_logging = true;
++ if (inode_only == LOG_OTHER_INODE)
++ inode_only = LOG_INODE_EXISTS;
++ else
++ inode_only = LOG_INODE_ALL;
+ mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&inode->log_mutex);
+@@ -5351,10 +5375,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+ inode->generation == trans->transid &&
+ !recursive_logging) {
+ u64 other_ino = 0;
++ u64 other_parent = 0;
+
+ ret = btrfs_check_ref_name_override(path->nodes[0],
+ path->slots[0], &min_key, inode,
+- &other_ino);
++ &other_ino, &other_parent);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+@@ -5377,7 +5402,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+ ins_nr = 0;
+
+ err = log_conflicting_inodes(trans, root, path,
+- ctx, other_ino);
++ ctx, other_ino, other_parent);
+ if (err)
+ goto out_unlock;
+ btrfs_release_path(path);
+--
+2.19.0
+
diff --git a/series.conf b/series.conf
index 0ba9ccd433..4dc0ca1198 100644
--- a/series.conf
+++ b/series.conf
@@ -20815,6 +20815,7 @@
patches.suse/0004-btrfs-qgroup-Use-delayed-subtree-rescan-for-balance.patch
patches.suse/0005-btrfs-qgroup-Cleanup-old-subtree-swap-code.patch
patches.suse/btrfs-fix-fsync-after-succession-of-renames-of-diffe.patch
+ patches.suse/btrfs-fix-fsync-after-succession-of-renames-and-unli.patch
patches.suse/btrfs-ensure-that-a-dup-or-raid1-block-group-has-exactly-two-stripes.patch
patches.arch/powerpc-pseries-Perform-full-re-add-of-CPU-for-topol.patch
patches.suse/powerpc-64s-clear-on-stack-exception-marker-upon-exception-return.patch