Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.com>2017-06-07 17:10:18 +0200
committerMichal Marek <mmarek@suse.com>2017-06-07 17:12:06 +0200
commit945d239f784012b419e11ffcca968b15875d6dca (patch)
tree9c9425a93172627991851b1749c8e08d16ab8465
parent7e26504e1f1d0ca1c3eb4d0f955dae7a8746f7c8 (diff)
Btrfs: rework delayed ref total_bytes_pinned accounting
(bsc#1040182).
-rw-r--r--patches.suse/0006-Btrfs-rework-delayed-ref-total_bytes_pinned-accounti.patch164
-rw-r--r--series.conf1
2 files changed, 165 insertions, 0 deletions
diff --git a/patches.suse/0006-Btrfs-rework-delayed-ref-total_bytes_pinned-accounti.patch b/patches.suse/0006-Btrfs-rework-delayed-ref-total_bytes_pinned-accounti.patch
new file mode 100644
index 0000000000..6a5b18ae3d
--- /dev/null
+++ b/patches.suse/0006-Btrfs-rework-delayed-ref-total_bytes_pinned-accounti.patch
@@ -0,0 +1,164 @@
+From d9e960e272013ff9d372ea51ce5b561738ea5f10 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Tue, 6 Jun 2017 16:45:31 -0700
+Subject: [PATCH 6/7] Btrfs: rework delayed ref total_bytes_pinned accounting
+References: bsc#1040182
+Patch-mainline: Submitted, https://www.spinics.net/lists/linux-btrfs/msg66259.html
+
+The total_bytes_pinned counter is completely broken when accounting
+delayed refs:
+
+- If two drops for the same extent are merged, we will decrement
+ total_bytes_pinned twice but only increment it once.
+- If an add is merged into a drop or vice versa, we will decrement the
+ total_bytes_pinned counter but never increment it.
+- If multiple references to an extent are dropped, we will account it
+ multiple times, potentially vastly over-estimating the number of bytes
+ that will be freed by a commit and doing unnecessary work when we're
+ close to ENOSPC.
+
+The last issue is relatively minor, but the first two make the
+total_bytes_pinned counter leak or underflow very often. These
+accounting issues were introduced in b150a4f10d87 ("Btrfs: use a percpu
+to keep track of possibly pinned bytes"), but they were papered over by
+zeroing out the counter on every commit until d288db5dc011 ("Btrfs: fix
+race of using total_bytes_pinned").
+
+We need to make sure that an extent is accounted as pinned exactly once
+if and only if we will drop references to it when when the transaction
+is committed. Ideally we would only add to total_bytes_pinned when the
+*last* reference is dropped, but this information isn't readily
+available for data extents. Again, this over-estimation can lead to
+extra commits when we're close to ENOSPC, but it's not as bad as before.
+
+The fix implemented here is to increment total_bytes_pinned when the
+total refmod count for an extent goes negative and decrement it if the
+refmod count goes back to non-negative or after we've run all of the
+delayed refs for that extent.
+
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Acked-by: NIkolay Borisov <nborsiov@suse.com>
+---
+ fs/btrfs/extent-tree.c | 39 +++++++++++++++++++++++++++++++--------
+ 1 file changed, 31 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index bb3df5552040..cf11d90ceb3c 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2096,6 +2096,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 owner, u64 offset)
+ {
++ int old_ref_mod, new_ref_mod;
+ int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+@@ -2107,13 +2108,17 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+ num_bytes,
+ parent, root_objectid, (int)owner,
+ BTRFS_ADD_DELAYED_REF, NULL,
+- NULL, NULL);
++ &old_ref_mod, &new_ref_mod);
+ } else {
+ ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset, 0,
+- BTRFS_ADD_DELAYED_REF, NULL, NULL, NULL);
++ BTRFS_ADD_DELAYED_REF, NULL,
++ &old_ref_mod, &new_ref_mod);
+ }
++ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
++ add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
++
+ return ret;
+ }
+
+@@ -2423,6 +2428,16 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+ trace_run_delayed_ref_head(root->fs_info, node, head,
+ node->action);
+
++ if (head->total_ref_mod < 0) {
++ struct btrfs_block_group_cache *cache;
++
++ cache = btrfs_lookup_block_group(root->fs_info, node->bytenr);
++ ASSERT(cache);
++ percpu_counter_add(&cache->space_info->total_bytes_pinned,
++ -node->num_bytes);
++ btrfs_put_block_group(cache);
++ }
++
+ if (insert_reserved) {
+ btrfs_pin_extent(root, node->bytenr,
+ node->num_bytes, 1);
+@@ -6265,6 +6280,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
+ trace_btrfs_space_reservation(root->fs_info, "pinned",
+ cache->space_info->flags,
+ num_bytes, 1);
++ percpu_counter_add(&cache->space_info->total_bytes_pinned,
++ num_bytes);
+ set_extent_dirty(info->pinned_extents,
+ bytenr, bytenr + num_bytes - 1,
+ GFP_NOFS | __GFP_NOFAIL);
+@@ -7046,8 +7063,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+ goto out;
+ }
+ }
+- add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
+- root_objectid);
+ } else {
+ if (found_extent) {
+ BUG_ON(is_data && refs_to_drop !=
+@@ -7172,13 +7187,16 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ int ret;
+
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
++ int old_ref_mod, new_ref_mod;
++
+ ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+ buf->start, buf->len,
+ parent, root->root_key.objectid,
+ btrfs_header_level(buf),
+ BTRFS_DROP_DELAYED_REF, NULL,
+- NULL, NULL);
++ &old_ref_mod, &new_ref_mod);
+ BUG_ON(ret); /* -ENOMEM */
++ pin = old_ref_mod >= 0 && new_ref_mod < 0;
+ }
+
+ if (last_ref && btrfs_header_generation(buf) == trans->transid) {
+@@ -7226,6 +7244,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+ u64 owner, u64 offset)
+ {
++ int old_ref_mod, new_ref_mod;
+ int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+@@ -7247,16 +7266,20 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ num_bytes,
+ parent, root_objectid, (int)owner,
+- BTRFS_DROP_DELAYED_REF, NULL, NULL
+- NULL);
++ BTRFS_DROP_DELAYED_REF, NULL,
++ &old_ref_mod, &new_ref_mod);
+ } else {
+ ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ num_bytes,
+ parent, root_objectid, owner,
+ offset, 0,
+ BTRFS_DROP_DELAYED_REF, NULL,
+- NULL, NULL);
++ &old_ref_mod, &new_ref_mod);
+ }
++
++ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
++ add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
++
+ return ret;
+ }
+
+--
+2.7.4
+
diff --git a/series.conf b/series.conf
index 86b7169e6e..d1f6d60301 100644
--- a/series.conf
+++ b/series.conf
@@ -3803,6 +3803,7 @@
patches.suse/0003-Btrfs-update-total_bytes_pinned-when-pinning-down-ex.patch
patches.suse/0004-Btrfs-always-account-pinned-bytes-when-dropping-a-tr.patch
patches.suse/0005-Btrfs-return-old-and-new-total-ref-mods-when-adding-.patch
+ patches.suse/0006-Btrfs-rework-delayed-ref-total_bytes_pinned-accounti.patch
# SUSE-specific
patches.suse/btrfs-provide-super_operations-get_inode_dev