Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2019-08-16 16:51:45 +0100
committerFilipe Manana <fdmanana@suse.com>2019-08-16 16:51:45 +0100
commite44b16efbda74e0ba684021dc3645dbfe6106c87 (patch)
tree94da51a551839c44413b28a573401cb7bf9af94b
parentfc04aaf77625e365937c728afc3db574dcd8668f (diff)
Btrfs: fix incremental send failure after deduplication
(bsc#1145940).
-rw-r--r--patches.suse/btrfs-fix-incremental-send-failure-after-deduplicati.patch181
-rw-r--r--series.conf1
2 files changed, 182 insertions, 0 deletions
diff --git a/patches.suse/btrfs-fix-incremental-send-failure-after-deduplicati.patch b/patches.suse/btrfs-fix-incremental-send-failure-after-deduplicati.patch
new file mode 100644
index 0000000000..9cd2be6ead
--- /dev/null
+++ b/patches.suse/btrfs-fix-incremental-send-failure-after-deduplicati.patch
@@ -0,0 +1,181 @@
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 17 Jul 2019 13:23:39 +0100
+Git-commit: b4f9a1a87a48c255bb90d8a6c3d555a1abb88130
+Patch-mainline: 5.3-rc3
+Subject: [PATCH] Btrfs: fix incremental send failure after deduplication
+References: bsc#1145940
+
+When doing an incremental send operation we can fail if we previously did
+deduplication operations against a file that exists in both snapshots. In
+that case we will fail the send operation with -EIO and print a message
+to dmesg/syslog like the following:
+
+ BTRFS error (device sdc): Send: inconsistent snapshot, found updated \
+ extent for inode 257 without updated inode item, send root is 258, \
+ parent root is 257
+
+This requires that we deduplicate to the same file in both snapshots for
+the same amount of times on each snapshot. The issue happens because a
+deduplication only updates the iversion of an inode and does not update
+any other field of the inode, therefore if we deduplicate the file on
+each snapshot for the same amount of time, the inode will have the same
+iversion value (stored as the "sequence" field on the inode item) on both
+snapshots, therefore it will be seen as unchanged between in the send
+snapshot while there are new/updated/deleted extent items when comparing
+to the parent snapshot. This makes the send operation return -EIO and
+print an error message.
+
+Example reproducer:
+
+ $ mkfs.btrfs -f /dev/sdb
+ $ mount /dev/sdb /mnt
+
+ # Create our first file. The first half of the file has several 64Kb
+ # extents while the second half as a single 512Kb extent.
+ $ xfs_io -f -s -c "pwrite -S 0xb8 -b 64K 0 512K" /mnt/foo
+ $ xfs_io -c "pwrite -S 0xb8 512K 512K" /mnt/foo
+
+ # Create the base snapshot and the parent send stream from it.
+ $ btrfs subvolume snapshot -r /mnt /mnt/mysnap1
+ $ btrfs send -f /tmp/1.snap /mnt/mysnap1
+
+ # Create our second file, that has exactly the same data as the first
+ # file.
+ $ xfs_io -f -c "pwrite -S 0xb8 0 1M" /mnt/bar
+
+ # Create the second snapshot, used for the incremental send, before
+ # doing the file deduplication.
+ $ btrfs subvolume snapshot -r /mnt /mnt/mysnap2
+
+ # Now before creating the incremental send stream:
+ #
+ # 1) Deduplicate into a subrange of file foo in snapshot mysnap1. This
+ # will drop several extent items and add a new one, also updating
+ # the inode's iversion (sequence field in inode item) by 1, but not
+ # any other field of the inode;
+ #
+ # 2) Deduplicate into a different subrange of file foo in snapshot
+ # mysnap2. This will replace an extent item with a new one, also
+ # updating the inode's iversion by 1 but not any other field of the
+ # inode.
+ #
+ # After these two deduplication operations, the inode items, for file
+ # foo, are identical in both snapshots, but we have different extent
+ # items for this inode in both snapshots. We want to check this doesn't
+ # cause send to fail with an error or produce an incorrect stream.
+
+ $ xfs_io -r -c "dedupe /mnt/bar 0 0 512K" /mnt/mysnap1/foo
+ $ xfs_io -r -c "dedupe /mnt/bar 512K 512K 512K" /mnt/mysnap2/foo
+
+ # Create the incremental send stream.
+ $ btrfs send -p /mnt/mysnap1 -f /tmp/2.snap /mnt/mysnap2
+ ERROR: send ioctl failed with -5: Input/output error
+
+This issue started happening back in 2015 when deduplication was updated
+to not update the inode's ctime and mtime and update only the iversion.
+Back then we would hit a BUG_ON() in send, but later in 2016 send was
+updated to return -EIO and print the error message instead of doing the
+BUG_ON().
+
+A test case for fstests follows soon.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203933
+Fixes: 1c919a5e13702c ("btrfs: don't update mtime/ctime on deduped inodes")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/send.c | 77 +++++++++++----------------------------------------------
+ 1 file changed, 15 insertions(+), 62 deletions(-)
+
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index bb1861ca7ddf..a3dd934ad293 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -6250,68 +6250,21 @@ static int changed_extent(struct send_ctx *sctx,
+ {
+ int ret = 0;
+
+- if (sctx->cur_ino != sctx->cmp_key->objectid) {
+-
+- if (result == BTRFS_COMPARE_TREE_CHANGED) {
+- struct extent_buffer *leaf_l;
+- struct extent_buffer *leaf_r;
+- struct btrfs_file_extent_item *ei_l;
+- struct btrfs_file_extent_item *ei_r;
+-
+- leaf_l = sctx->left_path->nodes[0];
+- leaf_r = sctx->right_path->nodes[0];
+- ei_l = btrfs_item_ptr(leaf_l,
+- sctx->left_path->slots[0],
+- struct btrfs_file_extent_item);
+- ei_r = btrfs_item_ptr(leaf_r,
+- sctx->right_path->slots[0],
+- struct btrfs_file_extent_item);
+-
+- /*
+- * We may have found an extent item that has changed
+- * only its disk_bytenr field and the corresponding
+- * inode item was not updated. This case happens due to
+- * very specific timings during relocation when a leaf
+- * that contains file extent items is COWed while
+- * relocation is ongoing and its in the stage where it
+- * updates data pointers. So when this happens we can
+- * safely ignore it since we know it's the same extent,
+- * but just at different logical and physical locations
+- * (when an extent is fully replaced with a new one, we
+- * know the generation number must have changed too,
+- * since snapshot creation implies committing the current
+- * transaction, and the inode item must have been updated
+- * as well).
+- * This replacement of the disk_bytenr happens at
+- * relocation.c:replace_file_extents() through
+- * relocation.c:btrfs_reloc_cow_block().
+- */
+- if (btrfs_file_extent_generation(leaf_l, ei_l) ==
+- btrfs_file_extent_generation(leaf_r, ei_r) &&
+- btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
+- btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
+- btrfs_file_extent_compression(leaf_l, ei_l) ==
+- btrfs_file_extent_compression(leaf_r, ei_r) &&
+- btrfs_file_extent_encryption(leaf_l, ei_l) ==
+- btrfs_file_extent_encryption(leaf_r, ei_r) &&
+- btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
+- btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
+- btrfs_file_extent_type(leaf_l, ei_l) ==
+- btrfs_file_extent_type(leaf_r, ei_r) &&
+- btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
+- btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
+- btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
+- btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
+- btrfs_file_extent_offset(leaf_l, ei_l) ==
+- btrfs_file_extent_offset(leaf_r, ei_r) &&
+- btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
+- btrfs_file_extent_num_bytes(leaf_r, ei_r))
+- return 0;
+- }
+-
+- inconsistent_snapshot_error(sctx, result, "extent");
+- return -EIO;
+- }
++ /*
++ * We have found an extent item that changed without the inode item
++ * having changed. This can happen either after relocation (where the
++ * disk_bytenr of an extent item is replaced at
++ * relocation.c:replace_file_extents()) or after deduplication into a
++ * file in both the parent and send snapshots (where an extent item can
++ * get modified or replaced with a new one). Note that deduplication
++ * updates the inode item, but it only changes the iversion (sequence
++ * field in the inode item) of the inode, so if a file is deduplicated
++ * the same amount of times in both the parent and send snapshots, its
++ * iversion becames the same in both snapshots, whence the inode item is
++ * the same on both snapshots.
++ */
++ if (sctx->cur_ino != sctx->cmp_key->objectid)
++ return 0;
+
+ if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
+ if (result != BTRFS_COMPARE_TREE_DELETED)
+--
+2.16.4
+
diff --git a/series.conf b/series.conf
index a7428b7a58..4a59d37ab8 100644
--- a/series.conf
+++ b/series.conf
@@ -23339,6 +23339,7 @@
patches.drivers/ALSA-pcm-fix-lost-wakeup-event-scenarios-in-snd_pcm_.patch
patches.drivers/ALSA-usb-audio-Fix-gpf-in-snd_usb_pipe_sanity_check.patch
patches.drivers/ACPI-PM-Fix-regression-in-acpi_device_set_power.patch
+ patches.suse/btrfs-fix-incremental-send-failure-after-deduplicati.patch
patches.suse/btrfs-fix-race-leading-to-fs-corruption-after-transa.patch
patches.drivers/IB-mlx5-Fix-MR-registration-flow-to-use-UMR-properly.patch
patches.drivers/libata-zpodd-Fix-small-read-overflow-in-zpodd_get_me.patch