Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOlaf Hering <ohering@suse.de>2018-11-06 18:14:33 +0100
committerOlaf Hering <ohering@suse.de>2018-11-06 18:19:57 +0100
commit83954f2c78227d85a6586207412cd67cea052953 (patch)
treeb3f0cf76c5492b219ae207f6010cf8e33ee24196
parent9dcd09e304ea057f4d7cf25c177b99636809b74e (diff)
parentc2f4b613bcdee9baf84c249e879ecb99493b1b8d (diff)
Merge remote-tracking branch 'kerncvs/SLE12-SP3' into SLE12-SP3-AZURErpm-4.4.162-4.19
-rw-r--r--arch/x86/include/asm/kexec.h2
-rw-r--r--arch/x86/kernel/apic/vector.c7
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/char/hw_random/core.c4
-rw-r--r--drivers/char/tpm/xen-tpmfront.c2
-rw-r--r--drivers/edac/thunderx_edac.c4
-rw-r--r--drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c2
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vq.c5
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c2
-rw-r--r--drivers/pci/host/pci-hyperv.c4
-rw-r--r--drivers/video/fbdev/Kconfig27
-rw-r--r--fs/btrfs/backref.c2
-rw-r--r--fs/btrfs/btrfs_inode.h27
-rw-r--r--fs/btrfs/ctree.h65
-rw-r--r--fs/btrfs/delayed-inode.c81
-rw-r--r--fs/btrfs/delayed-ref.c71
-rw-r--r--fs/btrfs/delayed-ref.h3
-rw-r--r--fs/btrfs/disk-io.c22
-rw-r--r--fs/btrfs/extent-tree.c525
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/file.c54
-rw-r--r--fs/btrfs/free-space-cache.c3
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c267
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/ordered-data.c21
-rw-r--r--fs/btrfs/qgroup.c525
-rw-r--r--fs/btrfs/qgroup.h148
-rw-r--r--fs/btrfs/relocation.c8
-rw-r--r--fs/btrfs/tests/inode-tests.c17
-rw-r--r--fs/btrfs/transaction.c17
-rw-r--r--fs/btrfs/transaction.h14
-rw-r--r--fs/cifs/cifsfs.c18
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/smb1ops.c1
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/smb2pdu.c16
-rw-r--r--fs/cifs/smb2pdu.h11
-rw-r--r--fs/cifs/smb2transport.c3
-rw-r--r--fs/cifs/transport.c18
-rw-r--r--include/linux/hw_random.h3
-rw-r--r--include/linux/irq.h1
-rw-r--r--include/trace/events/btrfs.h124
-rw-r--r--kernel/resource.c4
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mremap.c12
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--scripts/mod/devicetable-offsets.c3
-rw-r--r--scripts/mod/file2alias.c11
53 files changed, 1378 insertions, 829 deletions
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 282630e4c6ea..1624a7ffa95d 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -66,7 +66,7 @@ struct kimage;
/* Memory to backup during crash kdump */
#define KEXEC_BACKUP_SRC_START (0UL)
-#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
+#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/*
* CPU does not save ss and sp on stack if execution is already
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 26c376934a88..5e9c7652a1b6 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -74,6 +74,13 @@ struct irq_cfg *irq_cfg(unsigned int irq)
return irqd_cfg(irq_get_irq_data(irq));
}
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+ struct apic_chip_data *data = apic_chip_data(d);
+ return data->domain;
+}
+EXPORT_SYMBOL_GPL(irq_data_get_effective_affinity_mask);
+
static struct apic_chip_data *alloc_apic_chip_data(int node)
{
struct apic_chip_data *data;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 31e8928be82a..4c64faf4c112 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2412,7 +2412,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
return -ENOSYS;
if (arg != CDSL_CURRENT && arg != CDSL_NONE) {
- if ((int)arg >= cdi->capacity)
+ if (arg >= cdi->capacity)
return -EINVAL;
}
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index d579979e301a..863d2104c8f0 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -65,10 +65,10 @@ static unsigned short default_quality; /* = 0; default to "off" */
module_param(current_quality, ushort, 0644);
MODULE_PARM_DESC(current_quality,
- "current hwrng entropy estimation per mill");
+ "current hwrng entropy estimation per 1024 bits of input");
module_param(default_quality, ushort, 0644);
MODULE_PARM_DESC(default_quality,
- "default entropy content of hwrng per mill");
+ "default entropy content of hwrng per 1024 bits of input");
static void drop_current_rng(void);
static int hwrng_init(struct hwrng *rng);
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index bde4482e6da5..051a1c8fa27d 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -203,7 +203,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
return -ENOMEM;
}
- rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref);
+ rv = xenbus_grant_ring(dev, priv->shr, 1, &gref);
if (rv < 0)
return rv;
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 0927bba47b0c..bd50da3b9c99 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
default:
dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
l2c->pdev->device);
- return IRQ_NONE;
+ goto err_free;
}
while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
@@ -1928,7 +1928,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
l2c->ring_tail++;
}
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
err_free:
kfree(other);
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
index 01b05c305e63..fd7cdbe4330f 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
@@ -71,7 +71,6 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper,
DRM_DEBUG_DRIVER("surface width(%d), height(%d) and bpp(%d)\n",
sizes->surface_width, sizes->surface_height,
sizes->surface_bpp);
- sizes->surface_depth = 32;
bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
@@ -122,6 +121,7 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper,
hi_fbdev->fb = hibmc_framebuffer_init(priv->dev, &mode_cmd, gobj);
if (IS_ERR(hi_fbdev->fb)) {
ret = PTR_ERR(hi_fbdev->fb);
+ hi_fbdev->fb = NULL;
DRM_ERROR("failed to initialize framebuffer: %d\n", ret);
goto out_release_fbi;
}
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 52436b3c01bb..39d99d8a3270 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -679,11 +679,11 @@ int virtio_gpu_cmd_get_capset(struct virtio_gpu_device *vgdev,
{
struct virtio_gpu_get_capset *cmd_p;
struct virtio_gpu_vbuffer *vbuf;
- int max_size = vgdev->capsets[idx].max_size;
+ int max_size;
struct virtio_gpu_drv_cap_cache *cache_ent;
void *resp_buf;
- if (idx > vgdev->num_capsets)
+ if (idx >= vgdev->num_capsets)
return -EINVAL;
if (version > vgdev->capsets[idx].max_version)
@@ -693,6 +693,7 @@ int virtio_gpu_cmd_get_capset(struct virtio_gpu_device *vgdev,
if (!cache_ent)
return -ENOMEM;
+ max_size = vgdev->capsets[idx].max_size;
cache_ent->caps_cache = kmalloc(max_size, GFP_KERNEL);
if (!cache_ent->caps_cache) {
kfree(cache_ent);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index c1447dbec820..2959330735bb 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1175,8 +1175,6 @@ static void srp_terminate_io(struct srp_rport *rport)
{
struct srp_target_port *target = rport->lld_data;
struct srp_rdma_ch *ch;
- struct Scsi_Host *shost = target->scsi_host;
- struct scsi_device *sdev;
int i, j;
for (i = 0; i < target->ch_count; i++) {
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 3c6f4727a54f..0b366cdce14f 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -926,7 +926,7 @@ static void hv_irq_unmask(struct irq_data *data)
int cpu;
u64 res;
- dest = irq_data_get_affinity_mask(data);
+ dest = irq_data_get_effective_affinity_mask(data);
pdev = msi_desc_to_pci_dev(msi_desc);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
@@ -1105,7 +1105,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
int ret;
pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
- dest = irq_data_get_affinity_mask(data);
+ dest = irq_data_get_effective_affinity_mask(data);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 1e77851fa685..ad175a550299 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -2,6 +2,15 @@
# fbdev configuration
#
+config FB_CMDLINE
+ bool
+
+config FB_CLPS711X_OLD
+ tristate
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+
menuconfig FB
tristate "Support for frame buffer devices"
select FB_CMDLINE
@@ -53,9 +62,6 @@ config FIRMWARE_EDID
combination with certain motherboards and monitors are known to
suffer from this problem.
-config FB_CMDLINE
- bool
-
config FB_DDC
tristate
depends on FB
@@ -313,12 +319,6 @@ config FB_ACORN
hardware found in Acorn RISC PCs and other ARM-based machines. If
unsure, say N.
-config FB_CLPS711X_OLD
- tristate
- select FB_CFB_FILLRECT
- select FB_CFB_COPYAREA
- select FB_CFB_IMAGEBLIT
-
config FB_CLPS711X
tristate "CLPS711X LCD support"
depends on FB && (ARCH_CLPS711X || COMPILE_TEST)
@@ -1550,7 +1550,6 @@ if FB_VIA
config FB_VIA_DIRECT_PROCFS
bool "direct hardware access via procfs (DEPRECATED)(DANGEROUS)"
- depends on FB_VIA
default n
help
Allow direct hardware access to some output registers via procfs.
@@ -1560,7 +1559,6 @@ config FB_VIA_DIRECT_PROCFS
config FB_VIA_X_COMPATIBILITY
bool "X server compatibility"
- depends on FB_VIA
default n
help
This option reduces the functionality (power saving, ...) of the
@@ -2446,10 +2444,7 @@ config FB_SIMPLE
Configuration re: surface address, size, and format must be provided
through device tree, or plain old platform data.
-source "drivers/video/fbdev/omap/Kconfig"
-source "drivers/video/fbdev/omap2/Kconfig"
source "drivers/video/fbdev/exynos/Kconfig"
-source "drivers/video/fbdev/mmp/Kconfig"
config FB_SH_MOBILE_MERAM
tristate "SuperH Mobile MERAM read ahead support"
@@ -2492,3 +2487,7 @@ config FB_SM712
This driver is also available as a module. The module will be
called sm712fb. If you want to compile it as a module, say M
here and read <file:Documentation/kbuild/modules.txt>.
+
+source "drivers/video/fbdev/omap/Kconfig"
+source "drivers/video/fbdev/omap2/Kconfig"
+source "drivers/video/fbdev/mmp/Kconfig"
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 81a5ba433c6d..09605151ad5c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1157,7 +1157,7 @@ again:
*/
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f33ca2c466ec..624e2ccc5f96 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -36,14 +36,13 @@
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
-#define BTRFS_INODE_DELALLOC_META_RESERVED 4
-#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
-#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
-#define BTRFS_INODE_NEEDS_FULL_SYNC 7
-#define BTRFS_INODE_COPY_EVERYTHING 8
-#define BTRFS_INODE_IN_DELALLOC_LIST 9
-#define BTRFS_INODE_READDIO_NEED_LOCK 10
-#define BTRFS_INODE_HAS_PROPS 11
+#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
+#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
+#define BTRFS_INODE_NEEDS_FULL_SYNC 6
+#define BTRFS_INODE_COPY_EVERYTHING 7
+#define BTRFS_INODE_IN_DELALLOC_LIST 8
+#define BTRFS_INODE_READDIO_NEED_LOCK 9
+#define BTRFS_INODE_HAS_PROPS 10
/* in memory btrfs inode */
struct btrfs_inode {
@@ -169,7 +168,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
unsigned outstanding_extents;
- unsigned reserved_extents;
+
+ struct btrfs_block_rsv block_rsv;
/*
* always compress this one file
@@ -255,6 +255,15 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
return false;
}
+static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
+ int mod)
+{
+ lockdep_assert_held(&inode->lock);
+ inode->outstanding_extents += mod;
+ if (btrfs_is_free_space_inode(&inode->vfs_inode))
+ return;
+}
+
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
int ret = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 297f87d7d10c..90e74be0ec41 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -100,6 +100,14 @@ static int btrfs_csum_sizes[] = { 4 };
#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
+/*
+ * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+ */
+static inline u32 count_max_extents(u64 size)
+{
+ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+}
+
struct btrfs_mapping_tree {
struct extent_map_tree map_tree;
};
@@ -460,6 +468,25 @@ struct btrfs_block_rsv {
unsigned short full;
unsigned short type;
unsigned short failfast;
+
+ /*
+ * Qgroup equivalent for @size @reserved
+ *
+ * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
+ * about things like csum size nor how many tree blocks it will need to
+ * reserve.
+ *
+ * Qgroup cares more about net change of the extent usage.
+ *
+ * So for one newly inserted file extent, in worst case it will cause
+ * leaf split and level increase, nodesize for each file extent is
+ * already too much.
+ *
+ * In short, qgroup_size/reserved is the upper limit of possible needed
+ * qgroup metadata reservation.
+ */
+ u64 qgroup_rsv_size;
+ u64 qgroup_rsv_reserved;
};
/*
@@ -673,6 +700,12 @@ struct btrfs_delayed_root;
#define BTRFS_FS_LOG1_ERR 12
#define BTRFS_FS_LOG2_ERR 13
+/*
+ * To info transaction_kthread we need an immediate commit so it doesn't
+ * need to wait for commit_interval
+ */
+#define BTRFS_FS_NEED_ASYNC_COMMIT 17
+
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -712,8 +745,6 @@ struct btrfs_fs_info {
* delayed dir index item
*/
struct btrfs_block_rsv global_block_rsv;
- /* block reservation for delay allocation */
- struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
@@ -1223,8 +1254,10 @@ struct btrfs_root {
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshoted;
- /* For qgroup metadata space reserve */
- atomic64_t qgroup_meta_rsv;
+ /* For qgroup metadata reserved space */
+ spinlock_t qgroup_meta_rsv_lock;
+ u64 qgroup_meta_rsv_pertrans;
+ u64 qgroup_meta_rsv_prealloc;
};
static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize)
@@ -2568,8 +2601,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct btrfs_root *root,
struct extent_buffer *eb);
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_cross_ref_exist(struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
@@ -2678,7 +2710,8 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len);
+ struct extent_changeset *reserved,
+ u64 start, u64 len, bool qgroup_free);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -2694,13 +2727,19 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
u64 qgroup_reserved);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes,
+ bool qgroup_free);
int btrfs_delalloc_reserve_space(struct inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
unsigned short type);
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv,
+ unsigned short type);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
@@ -3605,6 +3644,16 @@ static inline int btrfs_test_is_dummy_root(struct btrfs_root *root)
return 0;
}
+static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
+{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO,
+ &fs_info->fs_state)))
+ return 1;
+#endif
+ return 0;
+}
+
/*
* Module parameter
*/
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b4943d3a8a5d..027e55d5072f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -22,6 +22,7 @@
#include "disk-io.h"
#include "transaction.h"
#include "ctree.h"
+#include "qgroup.h"
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
@@ -559,6 +560,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &root->fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+
+ /*
+ * Here we migrate space rsv from transaction rsv, since have already
+ * reserved space when starting a transaction. So no need to reserve
+ * qgroup space here.
+ */
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
@@ -579,6 +586,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
return;
rsv = &root->fs_info->delayed_block_rsv;
+ /*
+ * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
+ * to release/reserve qgroup space.
+ */
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
@@ -596,7 +607,6 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
- bool release = false;
src_rsv = trans->block_rsv;
dst_rsv = &root->fs_info->delayed_block_rsv;
@@ -604,39 +614,20 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
/*
- * If our block_rsv is the delalloc block reserve then check and see if
- * we have our extra reservation for updating the inode. If not fall
- * through and try to reserve space quickly.
- *
- * We used to try and steal from the delalloc block rsv or the global
- * reserve, but we'd steal a full reservation, which isn't kind. We are
- * here through delalloc which means we've likely just cowed down close
- * to the leaf that contains the inode, so we would steal less just
- * doing the fallback inode update, so if we do end up having to steal
- * from the global block rsv we hopefully only steal one or two blocks
- * worth which is less likely to hurt us.
- */
- if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
- spin_lock(&BTRFS_I(inode)->lock);
- if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
- release = true;
- else
- src_rsv = NULL;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
-
- /*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
* still need to reserve space for this update, so try to reserve the
* space.
*
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
- * we're accounted for.
+ * we always reserve enough to update the inode item.
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, root->nodesize,
+ true);
+ if (ret < 0)
+ return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
/*
@@ -645,50 +636,34 @@ static int btrfs_delayed_inode_reserve_metadata(
* EAGAIN to make us stop the transaction we have, so return
* ENOSPC instead so that btrfs_dirty_inode knows what to do.
*/
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
ret = -ENOSPC;
+ btrfs_qgroup_free_meta_prealloc(root, num_bytes);
+ }
if (!ret) {
node->bytes_reserved = num_bytes;
trace_btrfs_space_reservation(root->fs_info,
"delayed_inode",
btrfs_ino(inode),
num_bytes, 1);
+ } else {
+ btrfs_qgroup_free_meta_prealloc(root, root->nodesize);
}
return ret;
}
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
-
- /*
- * Migrate only takes a reservation, it doesn't touch the size of the
- * block_rsv. This is to simplify people who don't normally have things
- * migrated from their block rsv. If they go to release their
- * reservation, that will decrease the size as well, so if migrate
- * reduced size we'd end up with a negative size. But for the
- * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
- * but we could in fact do this reserve/migrate dance several times
- * between the time we did the original reservation and we'd clean it
- * up. So to take care of this, release the space for the meta
- * reservation here. I think it may be time for a documentation page on
- * how block rsvs. work.
- */
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
}
-
- if (release) {
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), num_bytes, 0);
- btrfs_block_rsv_release(root, src_rsv, num_bytes);
- }
-
return ret;
}
static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
- struct btrfs_delayed_node *node)
+ struct btrfs_delayed_node *node,
+ bool qgroup_free)
{
struct btrfs_block_rsv *rsv;
@@ -700,6 +675,12 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
node->inode_id, node->bytes_reserved, 0);
btrfs_block_rsv_release(root, rsv,
node->bytes_reserved);
+ if (qgroup_free)
+ btrfs_qgroup_free_meta_prealloc(node->root,
+ node->bytes_reserved);
+ else
+ btrfs_qgroup_convert_reserved_meta(node->root,
+ node->bytes_reserved);
node->bytes_reserved = 0;
}
@@ -1082,7 +1063,7 @@ out:
no_iref:
btrfs_release_path(path);
err_out:
- btrfs_delayed_inode_release_metadata(root, node);
+ btrfs_delayed_inode_release_metadata(root, node, (ret < 0));
btrfs_release_delayed_inode(node);
return ret;
@@ -1922,7 +1903,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
btrfs_release_delayed_iref(delayed_node);
if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
- btrfs_delayed_inode_release_metadata(root, delayed_node);
+ btrfs_delayed_inode_release_metadata(root, delayed_node, false);
btrfs_release_delayed_inode(delayed_node);
}
mutex_unlock(&delayed_node->mutex);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index bb041e797bff..9a76cbd04558 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -40,10 +40,10 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
/*
* compare two delayed tree backrefs with same bytenr and type
*/
-static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
- struct btrfs_delayed_tree_ref *ref1, int type)
+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
+ struct btrfs_delayed_tree_ref *ref2)
{
- if (type == BTRFS_TREE_BLOCK_REF_KEY) {
+ if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
if (ref1->root < ref2->root)
return -1;
if (ref1->root > ref2->root)
@@ -60,8 +60,8 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
/*
* compare two delayed data backrefs with same bytenr and type
*/
-static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
- struct btrfs_delayed_data_ref *ref1)
+static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
+ struct btrfs_delayed_data_ref *ref2)
{
if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
if (ref1->root < ref2->root)
@@ -85,6 +85,34 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
return 0;
}
+static int comp_refs(struct btrfs_delayed_ref_node *ref1,
+ struct btrfs_delayed_ref_node *ref2,
+ bool check_seq)
+{
+ int ret = 0;
+
+ if (ref1->type < ref2->type)
+ return -1;
+ if (ref1->type > ref2->type)
+ return 1;
+ if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
+ ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
+ btrfs_delayed_node_to_tree_ref(ref2));
+ else
+ ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
+ btrfs_delayed_node_to_data_ref(ref2));
+ if (ret)
+ return ret;
+ if (check_seq) {
+ if (ref1->seq < ref2->seq)
+ return -1;
+ if (ref1->seq > ref2->seq)
+ return 1;
+ }
+ return 0;
+}
+
/* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
struct rb_node *node)
@@ -222,19 +250,7 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
if (seq && next->seq >= seq)
goto next;
- if (next->type != ref->type)
- goto next;
-
- if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
- ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
- comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
- btrfs_delayed_node_to_tree_ref(next),
- ref->type))
- goto next;
- if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
- ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
- comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
- btrfs_delayed_node_to_data_ref(next)))
+ if (comp_refs(ref, next, false))
goto next;
if (ref->action == next->action) {
@@ -408,19 +424,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
list);
/* No need to compare bytenr nor is_head */
- if (exist->type != ref->type || exist->seq != ref->seq)
- goto add_tail;
-
- if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
- exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
- comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
- btrfs_delayed_node_to_tree_ref(ref),
- ref->type))
- goto add_tail;
- if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
- exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
- comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
- btrfs_delayed_node_to_data_ref(ref)))
+ if (comp_refs(exist, ref, true))
goto add_tail;
/* Now we are sure we can merge */
@@ -959,11 +963,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
* the head node if any where found, or NULL if not.
*/
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
{
- struct btrfs_delayed_ref_root *delayed_refs;
-
- delayed_refs = &trans->transaction->delayed_refs;
return find_ref_head(&delayed_refs->href_root, bytenr, 0);
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 7d470ea23a23..f91f9102d84a 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -274,7 +274,8 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+ u64 bytenr);
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60ae21b58931..cc97abf6bde0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1265,6 +1265,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
spin_lock_init(&root->accounting_lock);
spin_lock_init(&root->log_extents_lock[0]);
spin_lock_init(&root->log_extents_lock[1]);
+ spin_lock_init(&root->qgroup_meta_rsv_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
mutex_init(&root->ordered_extent_mutex);
@@ -1281,7 +1282,6 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1);
atomic_set(&root->will_be_snapshoted, 0);
- atomic64_set(&root->qgroup_meta_rsv, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
@@ -1898,6 +1898,8 @@ static int transaction_kthread(void *arg)
now = get_seconds();
if (cur->state < TRANS_STATE_BLOCKED &&
+ !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT,
+ &root->fs_info->flags) &&
(now < cur->start_time ||
now - cur->start_time < root->fs_info->commit_interval)) {
spin_unlock(&root->fs_info->trans_lock);
@@ -2556,14 +2558,6 @@ int open_ctree(struct super_block *sb,
goto fail_delalloc_bytes;
}
- fs_info->btree_inode = new_inode(sb);
- if (!fs_info->btree_inode) {
- err = -ENOMEM;
- goto fail_bio_counter;
- }
-
- mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
-
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
@@ -2597,8 +2591,6 @@ int open_ctree(struct super_block *sb,
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv,
BTRFS_BLOCK_RSV_GLOBAL);
- btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
- BTRFS_BLOCK_RSV_DELALLOC);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
@@ -2630,6 +2622,14 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock);
+
+ fs_info->btree_inode = new_inode(sb);
+ if (!fs_info->btree_inode) {
+ err = -ENOMEM;
+ goto fail_bio_counter;
+ }
+ mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
+
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
GFP_KERNEL);
if (!fs_info->delayed_root) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 895fffea0db6..92be5cb3d177 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
+#include <linux/lockdep.h>
#include "hash.h"
#include "tree-log.h"
#include "disk-io.h"
@@ -909,7 +910,7 @@ search_again:
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
@@ -3074,8 +3075,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline int check_delayed_ref(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
{
@@ -3083,13 +3083,23 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_data_ref *data_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_transaction *cur_trans;
int ret = 0;
- delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&root->fs_info->trans_lock);
+ cur_trans = root->fs_info->running_transaction;
+ if (cur_trans)
+ atomic_inc(&cur_trans->use_count);
+ spin_unlock(&root->fs_info->trans_lock);
+ if (!cur_trans)
+ return 0;
+
+ delayed_refs = &cur_trans->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head) {
spin_unlock(&delayed_refs->lock);
+ btrfs_put_transaction(cur_trans);
return 0;
}
@@ -3106,6 +3116,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
+ btrfs_put_transaction(cur_trans);
return -EAGAIN;
}
spin_unlock(&delayed_refs->lock);
@@ -3133,11 +3144,11 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
}
spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
+ btrfs_put_transaction(cur_trans);
return ret;
}
-static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline int check_committed_ref(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
{
@@ -3207,9 +3218,8 @@ out:
return ret;
}
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 offset, u64 bytenr)
+int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
+ u64 bytenr)
{
struct btrfs_path *path;
int ret;
@@ -3220,12 +3230,12 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
return -ENOENT;
do {
- ret = check_committed_ref(trans, root, path, objectid,
+ ret = check_committed_ref(root, path, objectid,
offset, bytenr);
if (ret && ret != -ENOENT)
goto out;
- ret2 = check_delayed_ref(trans, root, path, objectid,
+ ret2 = check_delayed_ref(root, path, objectid,
offset, bytenr);
} while (ret2 == -EAGAIN);
@@ -4747,7 +4757,6 @@ static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
bool wait_ordered)
{
- struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
@@ -4763,8 +4772,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
trans = (struct btrfs_trans_handle *)current->journal_info;
- block_rsv = &root->fs_info->delalloc_block_rsv;
- space_info = block_rsv->space_info;
+ space_info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_METADATA);
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
@@ -5509,15 +5517,20 @@ again:
}
}
-static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
+static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
- struct btrfs_block_rsv *dest, u64 num_bytes)
+ struct btrfs_block_rsv *dest, u64 num_bytes,
+ u64 *qgroup_to_release_ret)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
+ u64 qgroup_to_release = 0;
+ u64 ret;
spin_lock(&block_rsv->lock);
- if (num_bytes == (u64)-1)
+ if (num_bytes == (u64)-1) {
num_bytes = block_rsv->size;
+ qgroup_to_release = block_rsv->qgroup_rsv_size;
+ }
block_rsv->size -= num_bytes;
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
@@ -5526,8 +5539,16 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
+ if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+ block_rsv->qgroup_rsv_size;
+ block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+ } else {
+ qgroup_to_release = 0;
+ }
spin_unlock(&block_rsv->lock);
+ ret = num_bytes;
if (num_bytes > 0) {
if (dest) {
spin_lock(&dest->lock);
@@ -5547,6 +5568,9 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info_add_old_bytes(fs_info, space_info,
num_bytes);
}
+ if (qgroup_to_release_ret)
+ *qgroup_to_release_ret = qgroup_to_release;
+ return ret;
}
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
@@ -5570,6 +5594,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
rsv->type = type;
}
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv,
+ unsigned short type)
+{
+ btrfs_init_block_rsv(rsv, type);
+ rsv->space_info = __find_space_info(fs_info,
+ BTRFS_BLOCK_GROUP_METADATA);
+}
+
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
unsigned short type)
{
@@ -5580,9 +5613,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
if (!block_rsv)
return NULL;
- btrfs_init_block_rsv(block_rsv, type);
- block_rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
+ btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
return block_rsv;
}
@@ -5666,6 +5697,92 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
+/**
+ * btrfs_inode_rsv_refill - refill the inode block rsv.
+ * @inode - the inode we are refilling.
+ * @flush - the flusing restriction.
+ *
+ * Essentially the same as btrfs_block_rsv_refill, except it uses the
+ * block_rsv->size as the minimum size. We'll either refill the missing amount
+ * or return if we already have enough space. This will also handle the resreve
+ * tracepoint for the reserved amount.
+ */
+int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
+ enum btrfs_reserve_flush_enum flush)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 num_bytes = 0;
+ u64 qgroup_num_bytes = 0;
+ int ret = -ENOSPC;
+
+ spin_lock(&block_rsv->lock);
+ if (block_rsv->reserved < block_rsv->size)
+ num_bytes = block_rsv->size - block_rsv->reserved;
+ if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
+ qgroup_num_bytes = block_rsv->qgroup_rsv_size -
+ block_rsv->qgroup_rsv_reserved;
+ spin_unlock(&block_rsv->lock);
+
+ if (num_bytes == 0)
+ return 0;
+
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
+ if (ret)
+ return ret;
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+ if (!ret) {
+ block_rsv_add_bytes(block_rsv, num_bytes, 0);
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ btrfs_ino(&inode->vfs_inode),
+ num_bytes, 1);
+
+ /* Don't forget to increase qgroup_rsv_reserved */
+ spin_lock(&block_rsv->lock);
+ block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
+ spin_unlock(&block_rsv->lock);
+ } else
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
+ return ret;
+}
+
+/**
+ * btrfs_inode_rsv_release - release any excessive reservation.
+ * @inode - the inode we need to release from.
+ * @qgroup_free - free or convert qgroup meta.
+ * Unlike normal operation, qgroup meta reservation needs to know if we are
+ * freeing qgroup reservation or just converting it into per-trans. Normally
+ * @qgroup_free is true for error handling, and false for normal release.
+ *
+ * This is the same as btrfs_block_rsv_release, except that it handles the
+ * tracepoint for the reservation.
+ */
+void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 released = 0;
+ u64 qgroup_to_release = 0;
+
+ /*
+ * Since we statically set the block_rsv->size we just want to say we
+ * are releasing 0 bytes, and then we'll just get the reservation over
+ * the size free'd.
+ */
+ released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
+ &qgroup_to_release);
+ if (released > 0)
+ trace_btrfs_space_reservation(fs_info, "delalloc",
+ btrfs_ino(&inode->vfs_inode),
+ released, 0);
+ if (qgroup_free)
+ btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
+ else
+ btrfs_qgroup_convert_reserved_meta(inode->root,
+ qgroup_to_release);
+}
+
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
@@ -5675,7 +5792,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
- num_bytes);
+ num_bytes, NULL);
}
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5739,7 +5856,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
fs_info->global_block_rsv.space_info = space_info;
- fs_info->delalloc_block_rsv.space_info = space_info;
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
fs_info->delayed_block_rsv.space_info = space_info;
@@ -5758,9 +5874,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
- (u64)-1);
- WARN_ON(fs_info->delalloc_block_rsv.size > 0);
- WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
+ (u64)-1, NULL);
WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
@@ -5798,7 +5912,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
WARN_ON_ONCE(!list_empty(&trans->new_bgs));
block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
- trans->chunk_bytes_reserved);
+ trans->chunk_bytes_reserved, NULL);
trans->chunk_bytes_reserved = 0;
}
@@ -5863,7 +5977,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) {
/* One for parent inode, two for dir entries */
num_bytes = 3 * root->nodesize;
- ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret)
return ret;
} else {
@@ -5882,7 +5996,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
if (ret && *qgroup_reserved)
- btrfs_qgroup_free_meta(root, *qgroup_reserved);
+ btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
return ret;
}
@@ -5894,107 +6008,45 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
btrfs_block_rsv_release(root, rsv, (u64)-1);
}
-/**
- * drop_outstanding_extent - drop an outstanding extent
- * @inode: the inode we're dropping the extent for
- * @num_bytes: the number of bytes we're relaseing.
- *
- * This is called when we are freeing up an outstanding extent, either called
- * after an error or after an extent is written. This will return the number of
- * reserved extents that need to be freed. This must be called with
- * BTRFS_I(inode)->lock held.
- */
-static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
-{
- unsigned drop_inode_space = 0;
- unsigned dropped_extents = 0;
- unsigned num_extents = 0;
-
- num_extents = (unsigned)div64_u64(num_bytes +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
- ASSERT(num_extents);
- ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
- BTRFS_I(inode)->outstanding_extents -= num_extents;
-
- if (BTRFS_I(inode)->outstanding_extents == 0 &&
- test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
- drop_inode_space = 1;
-
+static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 reserve_size = 0;
+ u64 qgroup_rsv_size = 0;
+ u64 csum_leaves;
+ unsigned outstanding_extents;
+
+ lockdep_assert_held(&inode->lock);
+ outstanding_extents = inode->outstanding_extents;
+ if (outstanding_extents)
+ reserve_size = btrfs_calc_trans_metadata_size(inode->root,
+ outstanding_extents + 1);
+ csum_leaves = btrfs_csum_bytes_to_leaves(inode->root,
+ inode->csum_bytes);
+ reserve_size += btrfs_calc_trans_metadata_size(inode->root,
+ csum_leaves);
/*
- * If we have more or the same amount of outsanding extents than we have
- * reserved then we need to leave the reserved extents count alone.
+ * For qgroup rsv, the calculation is very simple:
+ * account one nodesize for each outstanding extent
+ *
+ * This is overestimating in most cases.
*/
- if (BTRFS_I(inode)->outstanding_extents >=
- BTRFS_I(inode)->reserved_extents)
- return drop_inode_space;
-
- dropped_extents = BTRFS_I(inode)->reserved_extents -
- BTRFS_I(inode)->outstanding_extents;
- BTRFS_I(inode)->reserved_extents -= dropped_extents;
- return dropped_extents + drop_inode_space;
-}
-
-/**
- * calc_csum_metadata_size - return the amount of metada space that must be
- * reserved/free'd for the given bytes.
- * @inode: the inode we're manipulating
- * @num_bytes: the number of bytes in question
- * @reserve: 1 if we are reserving space, 0 if we are freeing space
- *
- * This adjusts the number of csum_bytes in the inode and then returns the
- * correct amount of metadata that must either be reserved or freed. We
- * calculate how many checksums we can fit into one leaf and then divide the
- * number of bytes that will need to be checksumed by this value to figure out
- * how many checksums will be required. If we are adding bytes then the number
- * may go up and we will return the number of additional bytes that must be
- * reserved. If it is going down we will return the number of bytes that must
- * be freed.
- *
- * This must be called with BTRFS_I(inode)->lock held.
- */
-static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
- int reserve)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 old_csums, num_csums;
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
- BTRFS_I(inode)->csum_bytes == 0)
- return 0;
-
- old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
- if (reserve)
- BTRFS_I(inode)->csum_bytes += num_bytes;
- else
- BTRFS_I(inode)->csum_bytes -= num_bytes;
- num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
+ qgroup_rsv_size = outstanding_extents * fs_info->tree_root->nodesize;
- /* No change, no need to reserve more */
- if (old_csums == num_csums)
- return 0;
-
- if (reserve)
- return btrfs_calc_trans_metadata_size(root,
- num_csums - old_csums);
-
- return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
+ spin_lock(&block_rsv->lock);
+ block_rsv->size = reserve_size;
+ block_rsv->qgroup_rsv_size = qgroup_rsv_size;
+ spin_unlock(&block_rsv->lock);
}
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
- u64 to_reserve = 0;
- u64 csum_bytes;
- unsigned nr_extents = 0;
+ unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
bool delalloc_lock = true;
- u64 to_free = 0;
- unsigned dropped;
- bool release_extra = false;
/* If we are a free space inode we need to not flush since we will be in
* the middle of a transaction commit. We also don't need the delalloc
@@ -6020,117 +6072,32 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
+ /* Add our new extents and calculate the new rsv size. */
spin_lock(&BTRFS_I(inode)->lock);
- nr_extents = (unsigned)div64_u64(num_bytes +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
- BTRFS_I(inode)->outstanding_extents += nr_extents;
-
- nr_extents = 0;
- if (BTRFS_I(inode)->outstanding_extents >
- BTRFS_I(inode)->reserved_extents)
- nr_extents += BTRFS_I(inode)->outstanding_extents -
- BTRFS_I(inode)->reserved_extents;
-
- /* We always want to reserve a slot for updating the inode. */
- to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
- to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
- csum_bytes = BTRFS_I(inode)->csum_bytes;
- spin_unlock(&BTRFS_I(inode)->lock);
+ nr_extents = count_max_extents(num_bytes);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), nr_extents);
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) {
- ret = btrfs_qgroup_reserve_meta(root,
- nr_extents * root->nodesize, true);
- if (ret)
- goto out_fail;
- }
+ BTRFS_I(inode)->csum_bytes += num_bytes;
+ btrfs_calculate_inode_block_rsv_size(root->fs_info, BTRFS_I(inode));
+ spin_unlock(&BTRFS_I(inode)->lock);
- ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
- if (unlikely(ret)) {
- btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
+ ret = btrfs_inode_rsv_refill(BTRFS_I(inode), flush);
+ if (unlikely(ret))
goto out_fail;
- }
-
- spin_lock(&BTRFS_I(inode)->lock);
- if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags)) {
- to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
- release_extra = true;
- }
- BTRFS_I(inode)->reserved_extents += nr_extents;
- spin_unlock(&BTRFS_I(inode)->lock);
if (delalloc_lock)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
-
- if (to_reserve)
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), to_reserve, 1);
- if (release_extra)
- btrfs_block_rsv_release(root, block_rsv,
- btrfs_calc_trans_metadata_size(root,
- 1));
return 0;
out_fail:
spin_lock(&BTRFS_I(inode)->lock);
- dropped = drop_outstanding_extent(inode, num_bytes);
- /*
- * If the inodes csum_bytes is the same as the original
- * csum_bytes then we know we haven't raced with any free()ers
- * so we can just reduce our inodes csum bytes and carry on.
- */
- if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
- calc_csum_metadata_size(inode, num_bytes, 0);
- } else {
- u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
- u64 bytes;
+ nr_extents = count_max_extents(num_bytes);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), -nr_extents);
- /*
- * This is tricky, but first we need to figure out how much we
- * free'd from any free-ers that occured during this
- * reservation, so we reset ->csum_bytes to the csum_bytes
- * before we dropped our lock, and then call the free for the
- * number of bytes that were freed while we were trying our
- * reservation.
- */
- bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
- BTRFS_I(inode)->csum_bytes = csum_bytes;
- to_free = calc_csum_metadata_size(inode, bytes, 0);
-
-
- /*
- * Now we need to see how much we would have freed had we not
- * been making this reservation and our ->csum_bytes were not
- * artificially inflated.
- */
- BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
- bytes = csum_bytes - orig_csum_bytes;
- bytes = calc_csum_metadata_size(inode, bytes, 0);
-
- /*
- * Now reset ->csum_bytes to what it should be. If bytes is
- * more than to_free then we would have free'd more space had we
- * not had an artificially high ->csum_bytes, so we need to free
- * the remainder. If bytes is the same or less then we don't
- * need to do anything, the other free-ers did the correct
- * thing.
- */
- BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
- if (bytes > to_free)
- to_free = bytes - to_free;
- else
- to_free = 0;
- }
+ BTRFS_I(inode)->csum_bytes -= num_bytes;
+ btrfs_calculate_inode_block_rsv_size(root->fs_info, BTRFS_I(inode));
spin_unlock(&BTRFS_I(inode)->lock);
- if (dropped)
- to_free += btrfs_calc_trans_metadata_size(root, dropped);
-
- if (to_free) {
- btrfs_block_rsv_release(root, block_rsv, to_free);
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
- }
+ btrfs_inode_rsv_release(BTRFS_I(inode), true);
if (delalloc_lock)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
return ret;
@@ -6138,37 +6105,57 @@ out_fail:
/**
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for
- * @num_bytes: the number of bytes we're releasing
+ * @inode: the inode to release the reservation for.
+ * @num_bytes: the number of bytes we are releasing.
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
*
* This will release the metadata reservation for an inode. This can be called
* once we complete IO for a given set of bytes to release their metadata
- * reservations.
+ * reservations, or on error for the same reason.
*/
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes,
+ bool qgroup_free)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 to_free = 0;
- unsigned dropped;
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
- dropped = drop_outstanding_extent(inode, num_bytes);
-
- if (num_bytes)
- to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+ BTRFS_I(inode)->csum_bytes -= num_bytes;
+ btrfs_calculate_inode_block_rsv_size(root->fs_info, BTRFS_I(inode));
spin_unlock(&BTRFS_I(inode)->lock);
- if (dropped > 0)
- to_free += btrfs_calc_trans_metadata_size(root, dropped);
if (btrfs_test_is_dummy_root(root))
return;
+ btrfs_inode_rsv_release(BTRFS_I(inode), qgroup_free);
+}
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
+/**
+ * btrfs_delalloc_release_extents - release our outstanding_extents
+ * @inode: the inode to balance the reservation for.
+ * @num_bytes: the number of bytes we originally reserved with
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
+ *
+ * When we reserve space we increase outstanding_extents for the extents we may
+ * add. Once we've set the range as delalloc or created our ordered extents we
+ * have outstanding_extents to track the real usage, so we use this to free our
+ * temporarily tracked outstanding_extents. This _must_ be used in conjunction
+ * with btrfs_delalloc_reserve_metadata.
+ */
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ unsigned num_extents;
+
+ spin_lock(&inode->lock);
+ num_extents = count_max_extents(num_bytes);
+ btrfs_mod_outstanding_extents(inode, -num_extents);
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
- btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
- to_free);
+ if (btrfs_is_testing(fs_info))
+ return;
+ btrfs_inode_rsv_release(inode, qgroup_free);
}
/**
@@ -6217,10 +6204,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
* @inode: inode we're releasing space for
* @start: start position of the space already reserved
* @len: the len of the space already reserved
- *
- * This must be matched with a call to btrfs_delalloc_reserve_space. This is
- * called in the case that we don't need the metadata AND data reservations
- * anymore. So if there is an error or we insert an inline extent.
+ * @release_bytes: the len of the space we consumed or didn't use
*
* This function will release the metadata space that was not used and will
* decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
@@ -6228,9 +6212,10 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
* Also it will handle the qgroup reserved space.
*/
void btrfs_delalloc_release_space(struct inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len)
+ struct extent_changeset *reserved,
+ u64 start, u64 len, bool qgroup_free)
{
- btrfs_delalloc_release_metadata(inode, len);
+ btrfs_delalloc_release_metadata(inode, len, qgroup_free);
btrfs_free_reserved_data_space(inode, reserved, start, len);
}
@@ -7149,7 +7134,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head)
goto out_delayed_unlock;
@@ -8396,7 +8381,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
- block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
+ block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
}
/*
@@ -11023,6 +11008,15 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
return ret;
}
+/*
+ * Trim the whole filesystem by:
+ * 1) trimming the free space in each block group
+ * 2) trimming the unallocated space on each device
+ *
+ * This will also continue trimming even if a block group or device encounters
+ * an error. The return value will be the last error, or 0 if nothing bad
+ * happens.
+ */
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -11033,18 +11027,14 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
u64 start;
u64 end;
u64 trimmed = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+ u64 bg_failed = 0;
+ u64 dev_failed = 0;
+ int bg_ret = 0;
+ int dev_ret = 0;
int ret = 0;
- /*
- * try to trim all FS space, our block group may start from non-zero.
- */
- if (range->len == total_bytes)
- cache = btrfs_lookup_first_block_group(fs_info, range->start);
- else
- cache = btrfs_lookup_block_group(fs_info, range->start);
-
- while (cache) {
+ cache = btrfs_lookup_first_block_group(fs_info, range->start);
+ for (; cache; cache = next_block_group(root, cache)) {
if (cache->key.objectid >= (range->start + range->len)) {
btrfs_put_block_group(cache);
break;
@@ -11058,13 +11048,15 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
if (!block_group_cache_done(cache)) {
ret = cache_block_group(cache, 0);
if (ret) {
- btrfs_put_block_group(cache);
- break;
+ bg_failed++;
+ bg_ret = ret;
+ continue;
}
ret = wait_block_group_cache_done(cache);
if (ret) {
- btrfs_put_block_group(cache);
- break;
+ bg_failed++;
+ bg_ret = ret;
+ continue;
}
}
ret = btrfs_trim_block_group(cache,
@@ -11075,27 +11067,40 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
trimmed += group_trimmed;
if (ret) {
- btrfs_put_block_group(cache);
- break;
+ bg_failed++;
+ bg_ret = ret;
+ continue;
}
}
-
- cache = next_block_group(fs_info->tree_root, cache);
}
+ if (bg_failed)
+ btrfs_warn(fs_info,
+ "failed to trim %llu block group(s), last error %d",
+ bg_failed, bg_ret);
+
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
devices = &root->fs_info->fs_devices->alloc_list;
list_for_each_entry(device, devices, dev_alloc_list) {
ret = btrfs_trim_free_extents(device, range->minlen,
&group_trimmed);
- if (ret)
+ if (ret) {
+ dev_failed++;
+ dev_ret = ret;
break;
+ }
trimmed += group_trimmed;
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+ if (dev_failed)
+ btrfs_warn(fs_info,
+ "failed to trim %llu device(s), last error %d",
+ dev_failed, dev_ret);
range->len = trimmed;
+ if (bg_ret)
+ return bg_ret;
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 61b713d71f81..603ebdacaab4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -14,7 +14,7 @@
#define EXTENT_DEFRAG (1U << 6)
#define EXTENT_BOUNDARY (1U << 9)
#define EXTENT_NODATASUM (1U << 10)
-#define EXTENT_DO_ACCOUNTING (1U << 11)
+#define EXTENT_CLEAR_META_RESV (1U << 11)
#define EXTENT_FIRST_DELALLOC (1U << 12)
#define EXTENT_NEED_WAIT (1U << 13)
#define EXTENT_DAMAGED (1U << 14)
@@ -22,6 +22,8 @@
#define EXTENT_QGROUP_RESERVED (1U << 16)
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
+ EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eaa78b2cea2a..fc60db001368 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1521,7 +1521,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
int ret = 0;
bool only_release_metadata = false;
bool force_page_uptodate = false;
- bool need_unlock;
nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
PAGE_CACHE_SIZE / (sizeof(struct page *)));
@@ -1544,6 +1543,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
size_t copied;
size_t dirty_sectors;
size_t num_sectors;
+ int extents_locked;
WARN_ON(num_pages > nrptrs);
@@ -1586,6 +1586,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
}
+ WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
if (ret) {
if (!only_release_metadata)
@@ -1598,7 +1599,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
release_bytes = reserve_bytes;
- need_unlock = false;
again:
/*
* This is going to setup the pages array with the number of
@@ -1608,19 +1608,23 @@ again:
ret = prepare_pages(inode, pages, num_pages,
pos, write_bytes,
force_page_uptodate);
- if (ret)
+ if (ret) {
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ reserve_bytes, true);
break;
+ }
- ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
- pos, write_bytes, &lockstart,
- &lockend, &cached_state);
- if (ret < 0) {
- if (ret == -EAGAIN)
- goto again;
+ extents_locked = lock_and_cleanup_extent_if_need(
+ inode, pages,
+ num_pages, pos, write_bytes, &lockstart,
+ &lockend, &cached_state);
+ if (extents_locked < 0) {
+ if (extents_locked == -EAGAIN)
+ goto again;
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ reserve_bytes, true);
+ ret = extents_locked;
break;
- } else if (ret > 0) {
- need_unlock = true;
- ret = 0;
}
copied = btrfs_copy_from_user(pos, num_pages,
@@ -1650,28 +1654,15 @@ again:
PAGE_CACHE_SIZE);
}
- /*
- * If we had a short copy we need to release the excess delaloc
- * bytes we reserved. We need to increment outstanding_extents
- * because btrfs_delalloc_release_space and
- * btrfs_delalloc_release_metadata will decrement it, but
- * we still have an outstanding extent for the chunk we actually
- * managed to copy.
- */
if (num_sectors > dirty_sectors) {
/* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors <<
root->fs_info->sb->s_blocksize_bits;
- if (copied > 0) {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
if (only_release_metadata) {
btrfs_delalloc_release_metadata(inode,
- release_bytes);
+ release_bytes, true);
} else {
u64 __pos;
@@ -1679,7 +1670,7 @@ again:
(dirty_pages << PAGE_CACHE_SHIFT);
btrfs_delalloc_release_space(inode,
data_reserved, __pos,
- release_bytes);
+ release_bytes, true);
}
}
@@ -1690,10 +1681,12 @@ again:
ret = btrfs_dirty_pages(root, inode, pages,
dirty_pages, pos, copied,
NULL);
- if (need_unlock)
+ if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
GFP_NOFS);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
+ true);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
@@ -1730,11 +1723,12 @@ again:
if (release_bytes) {
if (only_release_metadata) {
btrfs_end_write_no_snapshoting(root);
- btrfs_delalloc_release_metadata(inode, release_bytes);
+ btrfs_delalloc_release_metadata(inode, release_bytes,
+ true);
} else {
btrfs_delalloc_release_space(inode, data_reserved,
round_down(pos, root->sectorsize),
- release_bytes);
+ release_bytes, true);
}
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 45102b91a999..eb0b40af528b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3545,7 +3545,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (ret) {
if (release_metadata)
- btrfs_delalloc_release_metadata(inode, inode->i_size);
+ btrfs_delalloc_release_metadata(inode, inode->i_size,
+ true);
#ifdef DEBUG
btrfs_err(root->fs_info,
"failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 289a9eb9b93e..2bbb5d4448ec 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -500,11 +500,12 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_metadata(inode, prealloc);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
out_put:
iput(inode);
out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6f9ea7a47e93..f801e81a4ba1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,6 +43,7 @@
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
#include <linux/uio.h>
+#include <linux/magic.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -68,7 +69,6 @@ struct btrfs_iget_args {
};
struct btrfs_dio_data {
- u64 outstanding_extents;
u64 reserve;
u64 unsubmitted_oe_range_start;
u64 unsubmitted_oe_range_end;
@@ -283,7 +283,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
btrfs_free_path(path);
return PTR_ERR(trans);
}
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ trans->block_rsv = &BTRFS_I(inode)->block_rsv;
if (compressed_size && compressed_pages)
extent_item_size = btrfs_file_extent_calc_inline_size(
@@ -315,7 +315,6 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
}
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
- btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
/*
@@ -549,16 +548,20 @@ cont:
}
if (ret <= 0) {
unsigned long clear_flags = EXTENT_DELALLOC |
- EXTENT_DEFRAG;
+ EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING;
unsigned long page_error_op;
- clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
/*
* inline extent creation worked or returned error,
* we don't need to create any more async work items.
* Unlock and free up our temp pages.
+ *
+ * We use DO_ACCOUNTING here because we need the
+ * delalloc_release_metadata to be done _after_ we drop
+ * our outstanding extent for clearing delalloc for this
+ * range.
*/
extent_clear_unlock_delalloc(inode, start, end, end,
NULL, clear_flags,
@@ -567,8 +570,6 @@ cont:
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
- btrfs_free_reserved_data_space_noquota(inode, start,
- end - start + 1);
goto free_pages_out;
}
}
@@ -945,11 +946,14 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 disk_num_bytes;
- u64 cur_alloc_size;
+ u64 cur_alloc_size = 0;
u64 blocksize = root->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ unsigned clear_bits;
+ unsigned long page_ops;
+ bool extent_reserved = false;
int ret = 0;
if (btrfs_is_free_space_inode(inode)) {
@@ -972,15 +976,19 @@ static noinline int cow_file_range(struct inode *inode,
ret = cow_file_range_inline(root, inode, start, end, 0, 0,
NULL);
if (ret == 0) {
+ /*
+ * We use DO_ACCOUNTING here because we need the
+ * delalloc_release_metadata to be run _after_ we drop
+ * our outstanding extent for clearing delalloc for this
+ * range.
+ */
extent_clear_unlock_delalloc(inode, start, end,
delalloc_end, NULL,
EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DEFRAG, PAGE_UNLOCK |
- PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
- PAGE_END_WRITEBACK);
+ EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+ PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
- btrfs_free_reserved_data_space_noquota(inode, start,
- end - start + 1);
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
@@ -997,14 +1005,14 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
- unsigned long op;
-
cur_alloc_size = disk_num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
+ cur_alloc_size = ins.offset;
+ extent_reserved = true;
em = alloc_extent_map();
if (!em) {
@@ -1040,7 +1048,6 @@ static noinline int cow_file_range(struct inode *inode,
if (ret)
goto out_reserve;
- cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
if (ret)
@@ -1066,18 +1073,19 @@ static noinline int cow_file_range(struct inode *inode,
* Do set the Private2 bit so we know this page was properly
* setup for writepage
*/
- op = unlock ? PAGE_UNLOCK : 0;
- op |= PAGE_SET_PRIVATE2;
+ page_ops = unlock ? PAGE_UNLOCK : 0;
+ page_ops |= PAGE_SET_PRIVATE2;
extent_clear_unlock_delalloc(inode, start,
start + ram_size - 1,
delalloc_end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
- op);
+ page_ops);
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
+ extent_reserved = false;
}
out:
return ret;
@@ -1088,12 +1096,35 @@ out_reserve:
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
out_unlock:
+ clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG |
+ EXTENT_CLEAR_META_RESV;
+ page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+ PAGE_END_WRITEBACK;
+ /*
+ * If we reserved an extent for our delalloc range (or a subrange) and
+ * failed to create the respective ordered extent, then it means that
+ * when we reserved the extent we decremented the extent's size from
+ * the data space_info's bytes_may_use counter and incremented the
+ * space_info's bytes_reserved counter by the same amount. We must make
+ * sure extent_clear_unlock_delalloc() does not try to decrement again
+ * the data space_info's bytes_may_use counter, therefore we do not pass
+ * it the flag EXTENT_CLEAR_DATA_RESV.
+ */
+ if (extent_reserved) {
+ extent_clear_unlock_delalloc(inode, start,
+ start + cur_alloc_size,
+ start + cur_alloc_size,
+ locked_page,
+ clear_bits,
+ page_ops);
+ start += cur_alloc_size;
+ if (start >= end)
+ goto out;
+ }
extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
locked_page,
- EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
- EXTENT_DELALLOC | EXTENT_DEFRAG,
- PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
- PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
+ clear_bits | EXTENT_CLEAR_DATA_RESV,
+ page_ops);
goto out;
}
@@ -1247,7 +1278,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
unsigned long *nr_written)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
@@ -1283,30 +1313,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
nolock = btrfs_is_free_space_inode(inode);
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
-
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode, start, end, end,
- locked_page,
- EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, PAGE_UNLOCK |
- PAGE_CLEAR_DIRTY |
- PAGE_SET_WRITEBACK |
- PAGE_END_WRITEBACK);
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
cow_start = (u64)-1;
cur_offset = start;
while (1) {
- ret = btrfs_lookup_file_extent(trans, root, path, ino,
+ ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
goto error;
@@ -1382,7 +1392,7 @@ next_slot:
goto out_check;
if (btrfs_extent_readonly(root, disk_bytenr))
goto out_check;
- ret = btrfs_cross_ref_exist(trans, root, ino,
+ ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr);
if (ret) {
@@ -1559,10 +1569,6 @@ out_check:
}
error:
- err = btrfs_end_transaction(trans, root);
- if (!ret)
- ret = err;
-
if (ret && cur_offset < end)
extent_clear_unlock_delalloc(inode, cur_offset, end, end,
locked_page, EXTENT_LOCKED |
@@ -1634,7 +1640,7 @@ static void btrfs_split_extent_hook(struct inode *inode,
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
- u64 num_extents;
+ u32 num_extents;
u64 new_size;
/*
@@ -1642,18 +1648,15 @@ static void btrfs_split_extent_hook(struct inode *inode,
* applies here, just in reverse.
*/
new_size = orig->end - split + 1;
- num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(new_size);
new_size = split - orig->start;
- num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
- if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ num_extents += count_max_extents(new_size);
+ if (count_max_extents(size) >= num_extents)
return;
}
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
}
@@ -1668,7 +1671,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
struct extent_state *other)
{
u64 new_size, old_size;
- u64 num_extents;
+ u32 num_extents;
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
@@ -1682,7 +1685,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
/* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents--;
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
spin_unlock(&BTRFS_I(inode)->lock);
return;
}
@@ -1706,18 +1709,14 @@ static void btrfs_merge_extent_hook(struct inode *inode,
* this case.
*/
old_size = other->end - other->start + 1;
- num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(old_size);
old_size = new->end - new->start + 1;
- num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
-
- if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ num_extents += count_max_extents(old_size);
+ if (count_max_extents(new_size) >= num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents--;
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
spin_unlock(&BTRFS_I(inode)->lock);
}
@@ -1780,15 +1779,12 @@ static void btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ u32 num_extents = count_max_extents(len);
bool do_list = !btrfs_is_free_space_inode(inode);
- if (*bits & EXTENT_FIRST_DELALLOC) {
- *bits &= ~EXTENT_FIRST_DELALLOC;
- } else {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
+ spin_lock(&BTRFS_I(inode)->lock);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
+ spin_unlock(&BTRFS_I(inode)->lock);
/* For sanity tests */
if (btrfs_test_is_dummy_root(root))
@@ -1815,8 +1811,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
unsigned *bits)
{
u64 len = state->end + 1 - state->start;
- u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
- BTRFS_MAX_EXTENT_SIZE);
+ u32 num_extents = count_max_extents(len);
spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1832,31 +1827,26 @@ static void btrfs_clear_bit_hook(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
bool do_list = !btrfs_is_free_space_inode(inode);
- if (*bits & EXTENT_FIRST_DELALLOC) {
- *bits &= ~EXTENT_FIRST_DELALLOC;
- } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents -= num_extents;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
+ spin_lock(&BTRFS_I(inode)->lock);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), -num_extents);
+ spin_unlock(&BTRFS_I(inode)->lock);
/*
* We don't reserve metadata space for space cache inodes so we
* don't need to call dellalloc_release_metadata if there is an
* error.
*/
- if (*bits & EXTENT_DO_ACCOUNTING &&
+ if (*bits & EXTENT_CLEAR_META_RESV &&
root != root->fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, len);
+ btrfs_delalloc_release_metadata(inode, len, false);
/* For sanity tests. */
if (btrfs_test_is_dummy_root(root))
return;
- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
- && do_list && !(state->state & EXTENT_NORESERVE)
- && (*bits & (EXTENT_DO_ACCOUNTING |
- EXTENT_CLEAR_DATA_RESV)))
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
+ do_list && !(state->state & EXTENT_NORESERVE) &&
+ (*bits & EXTENT_CLEAR_DATA_RESV))
btrfs_free_reserved_data_space_noquota(inode,
state->start, len);
@@ -2106,6 +2096,7 @@ again:
ClearPageChecked(page);
set_page_dirty(page);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -2958,7 +2949,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = NULL;
goto out;
}
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ trans->block_rsv = &BTRFS_I(inode)->block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
@@ -2993,7 +2984,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out_unlock;
}
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ trans->block_rsv = &BTRFS_I(inode)->block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compress_type = ordered_extent->compress_type;
@@ -3042,8 +3033,6 @@ out_unlock:
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
out:
- if (root != root->fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans)
btrfs_end_transaction(trans, root);
@@ -4776,8 +4765,11 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
(!len || ((len & (blocksize - 1)) == 0)))
goto out;
+ block_start = round_down(from, blocksize);
+ block_end = block_start + blocksize - 1;
+
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
- round_down(from, blocksize), blocksize);
+ block_start, blocksize);
if (ret)
goto out;
@@ -4785,15 +4777,12 @@ again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved,
- round_down(from, blocksize),
- blocksize);
+ block_start, blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
ret = -ENOMEM;
goto out;
}
- block_start = round_down(from, blocksize);
- block_end = block_start + blocksize - 1;
-
if (!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
lock_page(page);
@@ -4857,7 +4846,8 @@ again:
out_unlock:
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start,
- blocksize);
+ blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
unlock_page(page);
page_cache_release(page);
out:
@@ -7319,7 +7309,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes)
{
- struct btrfs_trans_handle *trans;
struct btrfs_path *path;
int ret;
struct extent_buffer *leaf;
@@ -7421,15 +7410,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
* look for other files referencing this extent, if we
* find any we must cow
*/
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = 0;
- goto out;
- }
-
- ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+ ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
key.offset - backref_offset, disk_bytenr);
- btrfs_end_transaction(trans, root);
if (ret) {
ret = 0;
goto out;
@@ -7653,35 +7635,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
return em;
}
-static void adjust_dio_outstanding_extents(struct inode *inode,
- struct btrfs_dio_data *dio_data,
- const u64 len)
-{
- unsigned num_extents;
-
- num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
- /*
- * If we have an outstanding_extents count still set then we're
- * within our reservation, otherwise we need to adjust our inode
- * counter appropriately.
- */
- if (dio_data->outstanding_extents >= num_extents) {
- dio_data->outstanding_extents -= num_extents;
- } else {
- /*
- * If dio write length has been split due to no large enough
- * contiguous space, we need to compensate our inode counter
- * appropriately.
- */
- u64 num_needed = num_extents - dio_data->outstanding_extents;
-
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents += num_needed;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
-}
-
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -7843,7 +7796,6 @@ unlock:
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
- adjust_dio_outstanding_extents(inode, dio_data, len);
WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
dio_data->unsubmitted_oe_range_end = start + len;
@@ -7873,14 +7825,6 @@ unlock_err:
err:
if (dio_data)
current->journal_info = dio_data;
- /*
- * Compensate the delalloc release we do in btrfs_direct_IO() when we
- * write less data then expected, so that we don't underflow our inode's
- * outstanding extents counter.
- */
- if (create && dio_data)
- adjust_dio_outstanding_extents(inode, dio_data, len);
-
return ret;
}
@@ -8729,9 +8673,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
offset, count);
if (ret)
goto out;
- dio_data.outstanding_extents = div64_u64(count +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
/*
* We need to know how many extents we reserved so that we can
@@ -8758,7 +8699,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
btrfs_delalloc_release_space(inode, data_reserved,
- offset, dio_data.reserve);
+ offset, dio_data.reserve, true);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
@@ -8774,7 +8715,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
0);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved,
- offset, count - (size_t)ret);
+ offset, count - (size_t)ret, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
}
out:
if (wakeup)
@@ -9089,11 +9031,9 @@ again:
reserved_space = round_up(size - page_start, root->sectorsize);
if (reserved_space < PAGE_CACHE_SIZE) {
end = page_start + reserved_space - 1;
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved,
- page_start, PAGE_CACHE_SIZE - reserved_space);
+ page_start, PAGE_CACHE_SIZE - reserved_space,
+ true);
}
}
@@ -9143,14 +9083,16 @@ again:
out_unlock:
if (!ret) {
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
}
unlock_page(page);
out:
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
btrfs_delalloc_release_space(inode, data_reserved, page_start,
- reserved_space);
+ reserved_space, (ret != 0));
out_noreserve:
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
@@ -9335,6 +9277,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct inode *btrfs_alloc_inode(struct super_block *sb)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_inode *ei;
struct inode *inode;
@@ -9360,8 +9303,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->lock);
ei->outstanding_extents = 0;
- ei->reserved_extents = 0;
-
+ if (sb->s_magic != BTRFS_TEST_MAGIC)
+ btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
+ BTRFS_BLOCK_RSV_DELALLOC);
ei->runtime_flags = 0;
ei->force_compress = BTRFS_COMPRESS_NONE;
@@ -9409,8 +9353,9 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!hlist_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
+ WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
+ WARN_ON(BTRFS_I(inode)->block_rsv.size);
WARN_ON(BTRFS_I(inode)->outstanding_extents);
- WARN_ON(BTRFS_I(inode)->reserved_extents);
WARN_ON(BTRFS_I(inode)->delalloc_bytes);
WARN_ON(BTRFS_I(inode)->csum_bytes);
WARN_ON(BTRFS_I(inode)->defrag_bytes);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f1c95c2a115f..50b102e977f6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -379,7 +379,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -403,11 +402,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return -EOPNOTSUPP;
if (copy_from_user(&range, arg, sizeof(range)))
return -EFAULT;
- if (range.start > total_bytes ||
- range.len < fs_info->sb->s_blocksize)
+
+ /*
+ * NOTE: Don't truncate the range using super->total_bytes. Bytenr of
+ * block group is in the logical address space, which can be any
+ * sectorsize aligned bytenr in the range [0, U64_MAX].
+ */
+ if (range.len < fs_info->sb->s_blocksize)
return -EINVAL;
- range.len = min(range.len, total_bytes - range.start);
range.minlen = max(range.minlen, minlen);
ret = btrfs_trim_fs(fs_info->tree_root, &range);
if (ret < 0)
@@ -1228,11 +1231,11 @@ again:
if (i_done != page_cnt) {
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_CACHE_SHIFT,
- (page_cnt - i_done) << PAGE_CACHE_SHIFT);
+ (page_cnt - i_done) << PAGE_CACHE_SHIFT, true);
}
@@ -1251,6 +1254,8 @@ again:
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+ false);
extent_changeset_free(data_reserved);
return i_done;
out:
@@ -1260,7 +1265,9 @@ out:
}
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_CACHE_SHIFT,
- page_cnt << PAGE_CACHE_SHIFT);
+ page_cnt << PAGE_CACHE_SHIFT, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+ true);
extent_changeset_free(data_reserved);
return ret;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ac134d0a96f9..82d7833f0793 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -241,6 +241,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
}
spin_unlock(&root->ordered_extent_lock);
+ /*
+ * We don't need the count_max_extents here, we can assume that all of
+ * that work has been done at higher layers, so this is truly the
+ * smallest the extent is going to get.
+ */
+ spin_lock(&BTRFS_I(inode)->lock);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
+ spin_unlock(&BTRFS_I(inode)->lock);
+
return 0;
}
@@ -588,11 +597,19 @@ void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ struct btrfs_root *root = btrfs_inode->root;
struct rb_node *node;
bool dec_pending_ordered = false;
- tree = &BTRFS_I(inode)->ordered_tree;
+ /* This is paired with btrfs_add_ordered_extent. */
+ spin_lock(&btrfs_inode->lock);
+ btrfs_mod_outstanding_extents(btrfs_inode, -1);
+ spin_unlock(&btrfs_inode->lock);
+ if (root != root->fs_info->tree_root)
+ btrfs_delalloc_release_metadata(inode, entry->len, false);
+
+ tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b3d0c1740335..61680d7da540 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/btrfs.h>
+#include <linux/sizes.h>
#include "ctree.h"
#include "transaction.h"
@@ -48,48 +49,80 @@
*/
/*
- * one struct for each qgroup, organized in fs_info->qgroup_tree.
+ * Helpers to access qgroup reservation
+ *
+ * Callers should ensure the lock context and type are valid
*/
-struct btrfs_qgroup {
- u64 qgroupid;
- /*
- * state
- */
- u64 rfer; /* referenced */
- u64 rfer_cmpr; /* referenced compressed */
- u64 excl; /* exclusive */
- u64 excl_cmpr; /* exclusive compressed */
+static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
+{
+ u64 ret = 0;
+ int i;
- /*
- * limits
- */
- u64 lim_flags; /* which limits are set */
- u64 max_rfer;
- u64 max_excl;
- u64 rsv_rfer;
- u64 rsv_excl;
+ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+ ret += qgroup->rsv.values[i];
- /*
- * reservation tracking
- */
- u64 reserved;
+ return ret;
+}
- /*
- * lists
- */
- struct list_head groups; /* groups this group is member of */
- struct list_head members; /* groups that are members of this group */
- struct list_head dirty; /* dirty groups */
- struct rb_node node; /* tree of qgroups */
+#ifdef CONFIG_BTRFS_DEBUG
+static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
+{
+ if (type == BTRFS_QGROUP_RSV_DATA)
+ return "data";
+ if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
+ return "meta_pertrans";
+ if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
+ return "meta_prealloc";
+ return NULL;
+}
+#endif
- /*
- * temp variables for accounting operations
- * Refer to qgroup_shared_accouting() for details.
- */
- u64 old_refcnt;
- u64 new_refcnt;
-};
+static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type)
+{
+ trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
+ qgroup->rsv.values[type] += num_bytes;
+}
+
+static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type)
+{
+ trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
+ if (qgroup->rsv.values[type] >= num_bytes) {
+ qgroup->rsv.values[type] -= num_bytes;
+ return;
+ }
+#ifdef CONFIG_BTRFS_DEBUG
+ WARN_RATELIMIT(1,
+ "qgroup %llu %s reserved space underflow, have %llu to free %llu",
+ qgroup->qgroupid, qgroup_rsv_type_str(type),
+ qgroup->rsv.values[type], num_bytes);
+#endif
+ qgroup->rsv.values[type] = 0;
+}
+
+static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *dest,
+ struct btrfs_qgroup *src)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+ qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
+}
+
+static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *dest,
+ struct btrfs_qgroup *src)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+ qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
+}
static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
int mod)
@@ -1031,33 +1064,29 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
-static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup *qgroup,
- u64 num_bytes)
-{
-#ifdef CONFIG_BTRFS_DEBUG
- WARN_ON(qgroup->reserved < num_bytes);
- btrfs_debug(fs_info,
- "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
- qgroup->qgroupid, qgroup->reserved, num_bytes);
-#endif
- qgroup->reserved = 0;
-}
/*
- * The easy accounting, if we are adding/removing the only ref for an extent
- * then this qgroup and all of the parent qgroups get their refrence and
- * exclusive counts adjusted.
+ * The easy accounting, we're updating qgroup relationship whose child qgroup
+ * only has exclusive extents.
+ *
+ * In this case, all exclsuive extents will also be exlusive for parent, so
+ * excl/rfer just get added/removed.
+ *
+ * So is qgroup reservation space, which should also be added/removed to
+ * parent.
+ * Or when child tries to release reservation space, parent will underflow its
+ * reservation (for relationship adding case).
*
* Caller should hold fs_info->qgroup_lock.
*/
static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
struct ulist *tmp, u64 ref_root,
- u64 num_bytes, int sign)
+ struct btrfs_qgroup *src, int sign)
{
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup_list *glist;
struct ulist_node *unode;
struct ulist_iterator uiter;
+ u64 num_bytes = src->excl;
int ret = 0;
qgroup = find_qgroup_rb(fs_info, ref_root);
@@ -1070,12 +1099,11 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
- if (sign > 0) {
- if (qgroup->reserved < num_bytes)
- report_reserved_underflow(fs_info, qgroup, num_bytes);
- else
- qgroup->reserved -= num_bytes;
- }
+
+ if (sign > 0)
+ qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
+ else
+ qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
qgroup_dirty(fs_info, qgroup);
@@ -1095,13 +1123,10 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
- if (sign > 0) {
- if (qgroup->reserved < num_bytes)
- report_reserved_underflow(fs_info, qgroup,
- num_bytes);
- else
- qgroup->reserved -= num_bytes;
- }
+ if (sign > 0)
+ qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
+ else
+ qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
@@ -1144,7 +1169,7 @@ static int quick_update_accounting(struct btrfs_fs_info *fs_info,
if (qgroup->excl == qgroup->rfer) {
ret = 0;
err = __qgroup_excl_accounting(fs_info, tmp, dst,
- qgroup->excl, sign);
+ qgroup, sign);
if (err < 0) {
ret = err;
goto out;
@@ -1440,37 +1465,6 @@ out:
return ret;
}
-int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_qgroup_extent_record *record;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- u64 qgroup_to_skip;
- int ret = 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
- qgroup_to_skip = delayed_refs->qgroup_to_skip;
-
- /*
- * No need to do lock, since this function will only be called in
- * btrfs_commmit_transaction().
- */
- node = rb_first(&delayed_refs->dirty_extent_root);
- while (node) {
- record = rb_entry(node, struct btrfs_qgroup_extent_record,
- node);
- ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
- &record->old_roots);
- if (ret < 0)
- break;
- if (qgroup_to_skip)
- ulist_del(record->old_roots, qgroup_to_skip, 0);
- node = rb_next(node);
- }
- return ret;
-}
-
int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_qgroup_extent_record *record)
@@ -1931,6 +1925,35 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
return 0;
}
+/*
+ * Check if the @roots potentially is a list of fs tree roots
+ *
+ * Return 0 for definitely not a fs/subvol tree roots ulist
+ * Return 1 for possible fs/subvol tree roots in the list (considering an empty
+ * one as well)
+ */
+static int maybe_fs_roots(struct ulist *roots)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+
+ /* Empty one, still possible for fs roots */
+ if (!roots || roots->nnodes == 0)
+ return 1;
+
+ ULIST_ITER_INIT(&uiter);
+ unode = ulist_next(roots, &uiter);
+ if (!unode)
+ return 1;
+
+ /*
+ * If it contains fs tree roots, then it must belong to fs/subvol
+ * trees.
+ * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
+ */
+ return is_fstree(unode->val);
+}
+
int
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@@ -1944,10 +1967,20 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
u64 nr_old_roots = 0;
int ret = 0;
- if (new_roots)
+ if (new_roots) {
+ if (!maybe_fs_roots(new_roots))
+ goto out_free;
nr_new_roots = new_roots->nnodes;
- if (old_roots)
+ }
+ if (old_roots) {
+ if (!maybe_fs_roots(old_roots))
+ goto out_free;
nr_old_roots = old_roots->nnodes;
+ }
+
+ /* Quick exit, either not fs tree roots, or won't affect any qgroup */
+ if (nr_old_roots == 0 && nr_new_roots == 0)
+ goto out_free;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
goto out_free;
@@ -2028,6 +2061,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
trace_btrfs_qgroup_account_extents(fs_info, record);
if (!ret) {
+ /*
+ * Old roots should be searched when inserting qgroup
+ * extent record
+ */
+ if (!record->old_roots) {
+ /* Search commit root to find old_roots */
+ ret = btrfs_find_all_roots(NULL, fs_info,
+ record->bytenr, 0,
+ &record->old_roots);
+ if (ret < 0)
+ goto cleanup;
+ }
+
/*
* Use (u64)-1 as time_seq to do special search, which
* doesn't lock tree or delayed_refs and search current
@@ -2037,8 +2083,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
record->bytenr, (u64)-1, &new_roots);
if (ret < 0)
goto cleanup;
- if (qgroup_to_skip)
+ if (qgroup_to_skip) {
ulist_del(new_roots, qgroup_to_skip, 0);
+ ulist_del(record->old_roots, qgroup_to_skip,
+ 0);
+ }
ret = btrfs_qgroup_account_extent(trans, fs_info,
record->bytenr, record->num_bytes,
record->old_roots, new_roots);
@@ -2324,28 +2373,65 @@ out:
return ret;
}
-static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
+/*
+ * Two limits to commit transaction in advance.
+ *
+ * For RATIO, it will be 1/RATIO of the remaining limit
+ * (excluding data and prealloc meta) as threshold.
+ * For SIZE, it will be in byte unit as threshold.
+ */
+#define QGROUP_PERTRANS_RATIO 32
+#define QGROUP_PERTRANS_SIZE SZ_32M
+static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
+ const struct btrfs_qgroup *qg, u64 num_bytes)
{
+ u64 limit;
+ u64 threshold;
+
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
+ qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
return false;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
+ qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
return false;
+ /*
+ * Even if we passed the check, it's better to check if reservation
+ * for meta_pertrans is pushing us near limit.
+ * If there is too much pertrans reservation or it's near the limit,
+ * let's try commit transaction to free some, using transaction_kthread
+ */
+ if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
+ BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
+ if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ limit = qg->max_excl;
+ else
+ limit = qg->max_rfer;
+ threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] -
+ qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) /
+ QGROUP_PERTRANS_RATIO;
+ threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE);
+
+ /*
+ * Use transaction_kthread to commit transaction, so we no
+ * longer need to bother nested transaction nor lock context.
+ */
+ if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold)
+ btrfs_commit_transaction_locksafe(fs_info);
+ }
+
return true;
}
-
-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
+ enum btrfs_qgroup_rsv_type type)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 ref_root = root->root_key.objectid;
int ret = 0;
- int retried = 0;
struct ulist_node *unode;
struct ulist_iterator uiter;
@@ -2354,7 +2440,6 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
if (num_bytes == 0)
return 0;
-retry:
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
@@ -2380,28 +2465,7 @@ retry:
qg = u64_to_ptr(unode->aux);
- if (enforce && !qgroup_check_limits(qg, num_bytes)) {
- /*
- * Commit the tree and retry, since we may have
- * deletions which would free up space.
- */
- if (!retried && qg->reserved > 0) {
- struct btrfs_trans_handle *trans;
-
- spin_unlock(&fs_info->qgroup_lock);
- ret = btrfs_start_delalloc_inodes(root, 0);
- if (ret)
- return ret;
- btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- retried++;
- goto retry;
- }
+ if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
ret = -EDQUOT;
goto out;
}
@@ -2424,7 +2488,8 @@ retry:
qg = u64_to_ptr(unode->aux);
- qg->reserved += num_bytes;
+ trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
+ qgroup_rsv_add(fs_info, qg, num_bytes, type);
}
out:
@@ -2432,8 +2497,18 @@ out:
return ret;
}
+/*
+ * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0
+ * qgroup).
+ *
+ * Will handle all higher level qgroup too.
+ *
+ * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
+ * This special case is only used for META_PERTRANS type.
+ */
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
- u64 ref_root, u64 num_bytes)
+ u64 ref_root, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@@ -2447,6 +2522,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
if (num_bytes == 0)
return;
+ if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
+ WARN(1, "%s: Invalid type to free", __func__);
+ return;
+ }
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
@@ -2457,6 +2536,13 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
if (!qgroup)
goto out;
+ if (num_bytes == (u64)-1)
+ /*
+ * We're freeing all pertrans rsv, get reserved value from
+ * level 0 qgroup as real num_bytes to free.
+ */
+ num_bytes = qgroup->rsv.values[type];
+
ulist_reinit(fs_info->qgroup_ulist);
ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
(uintptr_t)qgroup, GFP_ATOMIC);
@@ -2469,10 +2555,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
qg = u64_to_ptr(unode->aux);
- if (qg->reserved < num_bytes)
- report_reserved_underflow(fs_info, qg, num_bytes);
- else
- qg->reserved -= num_bytes;
+ trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
+ qgroup_rsv_release(fs_info, qg, num_bytes, type);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
@@ -2487,11 +2571,6 @@ out:
spin_unlock(&fs_info->qgroup_lock);
}
-static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
-{
- return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
- num_bytes);
-}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
@@ -2898,7 +2977,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
to_reserve, QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
- ret = qgroup_reserve(root, to_reserve, true);
+ ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
if (ret < 0)
goto cleanup;
@@ -2962,7 +3041,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
goto out;
freed += changeset.bytes_changed;
}
- btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
+ btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
+ BTRFS_QGROUP_RSV_DATA);
ret = freed;
out:
extent_changeset_release(&changeset);
@@ -2987,16 +3067,14 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
if (ret < 0)
goto out;
- if (free) {
- qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+ if (free)
trace_op = QGROUP_FREE;
- }
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
if (free)
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
- changeset.bytes_changed);
+ changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
ret = changeset.bytes_changed;
out:
extent_changeset_release(&changeset);
@@ -3041,8 +3119,48 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
}
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
- bool enforce)
+static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type)
+{
+ if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
+ type != BTRFS_QGROUP_RSV_META_PERTRANS)
+ return;
+ if (num_bytes == 0)
+ return;
+
+ spin_lock(&root->qgroup_meta_rsv_lock);
+ if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
+ root->qgroup_meta_rsv_prealloc += num_bytes;
+ else
+ root->qgroup_meta_rsv_pertrans += num_bytes;
+ spin_unlock(&root->qgroup_meta_rsv_lock);
+}
+
+static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type)
+{
+ if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
+ type != BTRFS_QGROUP_RSV_META_PERTRANS)
+ return 0;
+ if (num_bytes == 0)
+ return 0;
+
+ spin_lock(&root->qgroup_meta_rsv_lock);
+ if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
+ num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
+ num_bytes);
+ root->qgroup_meta_rsv_prealloc -= num_bytes;
+ } else {
+ num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
+ num_bytes);
+ root->qgroup_meta_rsv_pertrans -= num_bytes;
+ }
+ spin_unlock(&root->qgroup_meta_rsv_lock);
+ return num_bytes;
+}
+
+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type, bool enforce)
{
int ret;
@@ -3051,37 +3169,119 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return 0;
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
- ret = qgroup_reserve(root, num_bytes, enforce);
+ trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
+ ret = qgroup_reserve(root, num_bytes, enforce, type);
if (ret < 0)
return ret;
- atomic64_add(num_bytes, &root->qgroup_meta_rsv);
+ /*
+ * Record what we have reserved into root.
+ *
+ * To avoid quota disabled->enabled underflow.
+ * In that case, we may try to free space we haven't reserved
+ * (since quota was disabled), so record what we reserved into root.
+ * And ensure later release won't underflow this number.
+ */
+ add_root_meta_rsv(root, num_bytes, type);
return ret;
}
-void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
+void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
{
- u64 reserved;
+ struct btrfs_fs_info *fs_info = root->fs_info;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
!is_fstree(root->objectid))
return;
- reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
- if (reserved == 0)
- return;
- qgroup_free(root, reserved);
+ /* TODO: Update trace point to handle such free */
+ trace_qgroup_meta_free_all_pertrans(root);
+ /* Special value -1 means to free all reserved space */
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1,
+ BTRFS_QGROUP_RSV_META_PERTRANS);
}
-void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
+void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type)
{
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
!is_fstree(root->objectid))
return;
+ /*
+ * reservation for META_PREALLOC can happen before quota is enabled,
+ * which can lead to underflow.
+ * Here ensure we will only free what we really have reserved.
+ */
+ num_bytes = sub_root_meta_rsv(root, num_bytes, type);
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
- WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
- atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
- qgroup_free(root, num_bytes);
+ trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
+ btrfs_qgroup_free_refroot(root->fs_info, root->objectid, num_bytes,
+ type);
+}
+
+static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
+{
+ return (struct btrfs_qgroup *)(uintptr_t)n->aux;
+}
+
+static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
+ int num_bytes)
+{
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_qgroup *qgroup;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ int ret = 0;
+
+ if (num_bytes == 0)
+ return;
+ if (!quota_root)
+ return;
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, ref_root);
+ if (!qgroup)
+ goto out;
+ ulist_reinit(fs_info->qgroup_ulist);
+ ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
+ (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = unode_aux_to_qgroup(unode);
+
+ qgroup_rsv_release(fs_info, qg, num_bytes,
+ BTRFS_QGROUP_RSV_META_PREALLOC);
+ qgroup_rsv_add(fs_info, qg, num_bytes,
+ BTRFS_QGROUP_RSV_META_PERTRANS);
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(fs_info->qgroup_ulist,
+ glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
+ }
+ }
+out:
+ spin_unlock(&fs_info->qgroup_lock);
+}
+
+void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
+ !is_fstree(root->objectid))
+ return;
+ /* Same as btrfs_qgroup_free_meta_prealloc() */
+ num_bytes = sub_root_meta_rsv(root, num_bytes,
+ BTRFS_QGROUP_RSV_META_PREALLOC);
+ trace_qgroup_meta_convert(root, num_bytes);
+ qgroup_convert_meta(fs_info, root->objectid, num_bytes);
}
/*
@@ -3107,7 +3307,10 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
inode->i_ino, unode->val, unode->aux);
}
- qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+ btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+ BTRFS_I(inode)->root->objectid,
+ changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
+
}
extent_changeset_release(&changeset);
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 31b5709bfd1e..b8a6561922cf 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -34,6 +34,92 @@ struct btrfs_qgroup_extent_record {
};
/*
+ * Qgroup reservation types:
+ *
+ * DATA:
+ * space reserved for data
+ *
+ * META_PERTRANS:
+ * Space reserved for metadata (per-transaction)
+ * Due to the fact that qgroup data is only updated at transaction commit
+ * time, reserved space for metadata must be kept until transaction
+ * commits.
+ * Any metadata reserved that are used in btrfs_start_transaction() should
+ * be of this type.
+ *
+ * META_PREALLOC:
+ * There are cases where metadata space is reserved before starting
+ * transaction, and then btrfs_join_transaction() to get a trans handle.
+ * Any metadata reserved for such usage should be of this type.
+ * And after join_transaction() part (or all) of such reservation should
+ * be converted into META_PERTRANS.
+ */
+enum btrfs_qgroup_rsv_type {
+ BTRFS_QGROUP_RSV_DATA = 0,
+ BTRFS_QGROUP_RSV_META_PERTRANS,
+ BTRFS_QGROUP_RSV_META_PREALLOC,
+ BTRFS_QGROUP_RSV_LAST,
+};
+
+/*
+ * Represents how many bytes we have reserved for this qgroup.
+ *
+ * Each type should have different reservation behavior.
+ * E.g, data follows its io_tree flag modification, while
+ * *currently* meta is just reserve-and-clear during transcation.
+ *
+ * TODO: Add new type for reservation which can survive transaction commit.
+ * Currect metadata reservation behavior is not suitable for such case.
+ */
+struct btrfs_qgroup_rsv {
+ u64 values[BTRFS_QGROUP_RSV_LAST];
+};
+
+/*
+ * one struct for each qgroup, organized in fs_info->qgroup_tree.
+ */
+struct btrfs_qgroup {
+ u64 qgroupid;
+
+ /*
+ * state
+ */
+ u64 rfer; /* referenced */
+ u64 rfer_cmpr; /* referenced compressed */
+ u64 excl; /* exclusive */
+ u64 excl_cmpr; /* exclusive compressed */
+
+ /*
+ * limits
+ */
+ u64 lim_flags; /* which limits are set */
+ u64 max_rfer;
+ u64 max_excl;
+ u64 rsv_rfer;
+ u64 rsv_excl;
+
+ /*
+ * reservation tracking
+ */
+ struct btrfs_qgroup_rsv rsv;
+
+ /*
+ * lists
+ */
+ struct list_head groups; /* groups this group is member of */
+ struct list_head members; /* groups that are members of this group */
+ struct list_head dirty; /* dirty groups */
+ struct rb_node node; /* tree of qgroups */
+
+ /*
+ * temp variables for accounting operations
+ * Refer to qgroup_shared_accounting() for details.
+ */
+ u64 old_refcnt;
+ u64 new_refcnt;
+};
+
+/*
* For qgroup event trace points only
*/
#define QGROUP_RESERVE (1<<0)
@@ -62,8 +148,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
-int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+
/*
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
* So qgroup can account it at commit trans time.
@@ -130,7 +215,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
- u64 ref_root, u64 num_bytes);
+ u64 ref_root, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type);
/*
* TODO: Add proper trace point for it, as btrfs_qgroup_free() is
* called by everywhere, can't provide good trace for delayed ref case.
@@ -138,7 +224,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
- btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
+ btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
+ BTRFS_QGROUP_RSV_DATA);
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
@@ -155,9 +242,54 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
- bool enforce);
-void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
-void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type, bool enforce);
+/* Reserve metadata space for pertrans and prealloc type */
+static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
+ int num_bytes, bool enforce)
+{
+ return __btrfs_qgroup_reserve_meta(root, num_bytes,
+ BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
+}
+static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
+ int num_bytes, bool enforce)
+{
+ return __btrfs_qgroup_reserve_meta(root, num_bytes,
+ BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
+}
+
+void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type);
+
+/* Free per-transaction meta reservation for error handling */
+static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
+ int num_bytes)
+{
+ __btrfs_qgroup_free_meta(root, num_bytes,
+ BTRFS_QGROUP_RSV_META_PERTRANS);
+}
+
+/* Pre-allocated meta reservation can be freed at need */
+static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
+ int num_bytes)
+{
+ __btrfs_qgroup_free_meta(root, num_bytes,
+ BTRFS_QGROUP_RSV_META_PREALLOC);
+}
+
+/*
+ * Per-transaction meta reservation should be all freed at transaction commit
+ * time
+ */
+void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
+
+/*
+ * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
+ *
+ * This is called when preallocated meta reservation needs to be used.
+ * Normally after btrfs_join_transaction() call.
+ */
+void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
+
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
#endif /* __BTRFS_QGROUP__ */
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f00520a6d712..f10a075fe61d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3231,7 +3231,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
mask);
if (!page) {
btrfs_delalloc_release_metadata(inode,
- PAGE_CACHE_SIZE);
+ PAGE_CACHE_SIZE, true);
ret = -ENOMEM;
goto out;
}
@@ -3250,7 +3250,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
unlock_page(page);
page_cache_release(page);
btrfs_delalloc_release_metadata(inode,
- PAGE_CACHE_SIZE);
+ PAGE_CACHE_SIZE, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ PAGE_SIZE, true);
ret = -EIO;
goto out;
}
@@ -3280,6 +3282,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
page_cache_release(page);
index++;
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
+ false);
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(BTRFS_I(inode)->root);
}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 4c0575096909..47533525bb3b 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -939,7 +939,6 @@ static int test_extent_accounting(void)
btrfs_test_inode_set_ops(inode);
/* [BTRFS_MAX_EXTENT_SIZE] */
- BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
NULL, 0);
if (ret) {
@@ -954,7 +953,6 @@ static int test_extent_accounting(void)
}
/* [BTRFS_MAX_EXTENT_SIZE][4k] */
- BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
BTRFS_MAX_EXTENT_SIZE + 4095, NULL, 0);
if (ret) {
@@ -973,7 +971,7 @@ static int test_extent_accounting(void)
BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
EXTENT_DELALLOC | EXTENT_DIRTY |
- EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
+ EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
@@ -987,7 +985,6 @@ static int test_extent_accounting(void)
}
/* [BTRFS_MAX_EXTENT_SIZE][4K] */
- BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
NULL, 0);
@@ -1004,12 +1001,7 @@ static int test_extent_accounting(void)
/*
* [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
- *
- * I'm artificially adding 2 to outstanding_extents because in the
- * buffered IO case we'd add things up as we go, but I don't feel like
- * doing that here, this isn't the interesting case we want to test.
*/
- BTRFS_I(inode)->outstanding_extents += 2;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
(BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
NULL, 0);
@@ -1044,7 +1036,7 @@ static int test_extent_accounting(void)
BTRFS_MAX_EXTENT_SIZE+4096,
BTRFS_MAX_EXTENT_SIZE+8191,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+ EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
@@ -1061,7 +1053,6 @@ static int test_extent_accounting(void)
* Refill the hole again just for good measure, because I thought it
* might fail and I'd rather satisfy my paranoia at this point.
*/
- BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
BTRFS_MAX_EXTENT_SIZE+8191, NULL, 0);
if (ret) {
@@ -1078,7 +1069,7 @@ static int test_extent_accounting(void)
/* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+ EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
@@ -1095,7 +1086,7 @@ out:
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+ EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
iput(inode);
btrfs_free_dummy_root(root);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cb0f86ee374d..28b4631610d7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -499,7 +499,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
qgroup_reserved = num_items * root->nodesize;
- ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
+ ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
enforce_qgroups);
if (ret)
return ERR_PTR(ret);
@@ -599,7 +599,7 @@ alloc_fail:
btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
num_bytes);
reserve_fail:
- btrfs_qgroup_free_meta(root, qgroup_reserved);
+ btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
return ERR_PTR(ret);
}
@@ -1270,7 +1270,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->fs_roots_radix_lock);
if (err)
break;
- btrfs_qgroup_free_meta_all(root);
+ btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
@@ -1351,9 +1351,6 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
ret = commit_fs_roots(trans, src);
if (ret)
goto out;
- ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
- if (ret < 0)
- goto out;
ret = btrfs_qgroup_account_extents(trans, fs_info);
if (ret < 0)
goto out;
@@ -2104,13 +2101,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto scrub_continue;
}
- /* Reocrd old roots for later qgroup accounting */
- ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info);
- if (ret) {
- mutex_unlock(&root->fs_info->reloc_mutex);
- goto scrub_continue;
- }
-
/*
* make sure none of the code above managed to slip in a
* delayed item
@@ -2255,6 +2245,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
*/
cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
+ clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &root->fs_info->flags);
spin_lock(&root->fs_info->trans_lock);
list_del_init(&cur_trans->list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index b237a8f79239..20c80b3fde7d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -185,6 +185,20 @@ static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
delayed_refs->qgroup_to_skip = 0;
}
+
+/*
+ * Try to commit transaction asynchronously, so this is safe to call
+ * even holding a spinlock.
+ *
+ * It's done by informing transaction_kthread to commit transaction without
+ * waiting for commit interval.
+ */
+static inline void btrfs_commit_transaction_locksafe(
+ struct btrfs_fs_info *fs_info)
+{
+ set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
+ wake_up_process(fs_info->transaction_kthread);
+}
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 839ff0f85745..307570a2937c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -189,14 +189,16 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
xid = get_xid();
- /*
- * PATH_MAX may be too long - it would presumably be total path,
- * but note that some servers (includinng Samba 3) have a shorter
- * maximum path.
- *
- * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO.
- */
- buf->f_namelen = PATH_MAX;
+ if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0)
+ buf->f_namelen =
+ le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength);
+ else
+ buf->f_namelen = PATH_MAX;
+
+ buf->f_fsid.val[0] = tcon->vol_serial_number;
+ /* are using part of create time for more randomness, see man statfs */
+ buf->f_fsid.val[1] = (int)le64_to_cpu(tcon->vol_create_time);
+
buf->f_files = 0; /* undefined */
buf->f_ffree = 0; /* unlimited */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 868ae3930fc8..d21d169bb722 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1302,6 +1302,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server,
/* one of these for every pending CIFS request to the server */
struct mid_q_entry {
struct list_head qhead; /* mids waiting on reply from this server */
+ struct kref refcount;
struct TCP_Server_Info *server; /* server corresponding to this mid */
__u64 mid; /* multiplex id */
__u32 pid; /* process id */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4552a4ffc551..bf573b91ee01 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -74,6 +74,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
struct TCP_Server_Info *server);
extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
extern void cifs_delete_mid(struct mid_q_entry *mid);
+extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry);
extern void cifs_wake_up_task(struct mid_q_entry *mid);
extern int cifs_handle_standard(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 44ef7d45c0f9..73322d41768c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -883,6 +883,7 @@ cifs_demultiplex_thread(void *p)
continue;
server->total_read += length;
+ mid_entry = NULL;
if (server->ops->is_transform_hdr &&
server->ops->receive_transform &&
server->ops->is_transform_hdr(buf)) {
@@ -897,8 +898,11 @@ cifs_demultiplex_thread(void *p)
length = mid_entry->receive(server, mid_entry);
}
- if (length < 0)
+ if (length < 0) {
+ if (mid_entry)
+ cifs_mid_q_entry_release(mid_entry);
continue;
+ }
if (server->large_buf)
buf = server->bigbuf;
@@ -914,6 +918,8 @@ cifs_demultiplex_thread(void *p)
if (!mid_entry->multiRsp || mid_entry->multiEnd)
mid_entry->callback(mid_entry);
+
+ cifs_mid_q_entry_release(mid_entry);
} else if (server->ops->is_oplock_break &&
server->ops->is_oplock_break(buf, server)) {
cifs_dbg(FYI, "Received oplock break\n");
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index c4255e292f6e..18466d4ceb39 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -105,6 +105,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
if (compare_mid(mid->mid, buf) &&
mid->mid_state == MID_REQUEST_SUBMITTED &&
le16_to_cpu(mid->command) == buf->Command) {
+ kref_get(&mid->refcount);
spin_unlock(&GlobalMid_Lock);
return mid;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e40e6c52a584..3849a285660b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -197,6 +197,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
if ((mid->mid == wire_mid) &&
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
(mid->command == shdr->Command)) {
+ kref_get(&mid->refcount);
spin_unlock(&GlobalMid_Lock);
return mid;
}
@@ -326,6 +327,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_DEVICE_INFORMATION);
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_VOLUME_INFORMATION);
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return;
@@ -981,10 +984,11 @@ smb2_is_session_expired(char *buf)
{
struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
- if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED)
+ if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED &&
+ shdr->Status != STATUS_USER_SESSION_DELETED)
return false;
- cifs_dbg(FYI, "Session expired\n");
+ cifs_dbg(FYI, "Session expired or deleted\n");
return true;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 767c4014b754..fafe70f14d5a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1517,8 +1517,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len,
tcon->treeName, path);
- if (rc)
+ if (rc) {
+ cifs_small_buf_release(req);
return rc;
+ }
req->NameLength = cpu_to_le16(name_len * 2);
uni_path_len = copy_size;
path = copy_path;
@@ -1529,8 +1531,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (uni_path_len % 8 != 0) {
copy_size = roundup(uni_path_len, 8);
copy_path = kzalloc(copy_size, GFP_KERNEL);
- if (!copy_path)
+ if (!copy_path) {
+ cifs_small_buf_release(req);
return -ENOMEM;
+ }
memcpy((char *)copy_path, (const char *)path,
uni_path_len);
uni_path_len = copy_size;
@@ -3131,6 +3135,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
} else if (level == FS_SECTOR_SIZE_INFORMATION) {
max_len = sizeof(struct smb3_fs_ss_info);
min_len = sizeof(struct smb3_fs_ss_info);
+ } else if (level == FS_VOLUME_INFORMATION) {
+ max_len = sizeof(struct smb3_fs_vol_info) + MAX_VOL_LABEL_LEN;
+ min_len = sizeof(struct smb3_fs_vol_info);
} else {
cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level);
return -EINVAL;
@@ -3171,6 +3178,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
tcon->ss_flags = le32_to_cpu(ss_info->Flags);
tcon->perf_sector_size =
le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
+ } else if (level == FS_VOLUME_INFORMATION) {
+ struct smb3_fs_vol_info *vol_info = (struct smb3_fs_vol_info *)
+ (offset + (char *)rsp);
+ tcon->vol_serial_number = vol_info->VolumeSerialNumber;
+ tcon->vol_create_time = vol_info->VolumeCreationTime;
}
qfsattr_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 12e6508bdd60..6aea50f623eb 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1104,6 +1104,17 @@ struct smb3_fs_ss_info {
__le32 ByteOffsetForPartitionAlignment;
} __packed;
+/* volume info struct - see MS-FSCC 2.5.9 */
+#define MAX_VOL_LABEL_LEN 32
+struct smb3_fs_vol_info {
+ __le64 VolumeCreationTime;
+ __u32 VolumeSerialNumber;
+ __le32 VolumeLabelLength; /* includes trailing null */
+ __u8 SupportsObjects; /* True if eg like NTFS, supports objects */
+ __u8 Reserved;
+ __u8 VolumeLabel[0]; /* variable len */
+} __packed;
+
/* partial list of QUERY INFO levels */
#define FILE_DIRECTORY_INFORMATION 1
#define FILE_FULL_DIRECTORY_INFORMATION 2
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 3bb65c1853ca..d9c185511aae 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -433,7 +433,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__);
return rc;
}
-
+
rc = __cifs_calc_signature(rqst, server, sigptr,
&server->secmech.sdesccmacaes->shash);
@@ -542,6 +542,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr,
return temp;
else {
memset(temp, 0, sizeof(struct mid_q_entry));
+ kref_init(&temp->refcount);
temp->mid = le64_to_cpu(shdr->MessageId);
temp->pid = current->pid;
temp->command = shdr->Command; /* Always LE */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index de05e4da2f34..91935a0363ea 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -58,6 +58,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
return temp;
else {
memset(temp, 0, sizeof(struct mid_q_entry));
+ kref_init(&temp->refcount);
temp->mid = get_mid(smb_buffer);
temp->pid = current->pid;
temp->command = cpu_to_le16(smb_buffer->Command);
@@ -80,6 +81,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
return temp;
}
+static void _cifs_mid_q_entry_release(struct kref *refcount)
+{
+ struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
+ refcount);
+
+ mempool_free(mid, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+ spin_lock(&GlobalMid_Lock);
+ kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+ spin_unlock(&GlobalMid_Lock);
+}
+
void
DeleteMidQEntry(struct mid_q_entry *midEntry)
{
@@ -108,7 +124,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
}
}
#endif
- mempool_free(midEntry, cifs_mid_poolp);
+ cifs_mid_q_entry_release(midEntry);
}
void
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 4f7d8f4b1e9a..0dc10914c8f0 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -32,7 +32,8 @@
* into the buffer. The buffer is aligned for any type.
* @priv: Private data, for use by the RNG driver.
* @quality: Estimation of true entropy in RNG's bitstream
- * (per mill).
+ * (in bits of entropy per 1024 bits of input;
+ * valid values: 1 to 1024, or 0 for unknown).
*/
struct hwrng {
const char *name;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 45b037e18030..d0a044f1f635 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -690,6 +690,7 @@ static inline struct cpumask *irq_get_affinity_mask(int irq)
return d ? d->common->affinity : NULL;
}
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d);
static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
{
return d->common->affinity;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2ffd11c0b5d3..84917a781747 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -25,6 +25,7 @@ struct btrfs_work;
struct __btrfs_workqueue;
struct btrfs_qgroup_extent_record;
struct prelim_ref;
+struct btrfs_qgroup;
#define show_ref_type(type) \
__print_symbolic(type, \
@@ -54,6 +55,12 @@ struct prelim_ref;
(obj >= BTRFS_ROOT_TREE_OBJECTID && \
obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
+#define show_qgroup_rsv_type(type) \
+ __print_symbolic(type, \
+ { BTRFS_QGROUP_RSV_DATA, "DATA" }, \
+ { BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" }, \
+ { BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
+
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
{ BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \
@@ -1543,6 +1550,123 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
TP_ARGS(fs_info, oldref, newref, tree_size)
);
+TRACE_EVENT(qgroup_update_reserve,
+
+ TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
+ s64 diff, int type),
+
+ TP_ARGS(fs_info, qgroup, diff, type),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, qgid )
+ __field( u64, cur_reserved )
+ __field( s64, diff )
+ __field( int, type )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->qgid = qgroup->qgroupid;
+ __entry->cur_reserved = qgroup->rsv.values[type];
+ __entry->diff = diff;
+ ),
+
+ TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
+ __entry->qgid, show_qgroup_rsv_type(__entry->type),
+ __entry->cur_reserved, __entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_reserve,
+
+ TP_PROTO(struct btrfs_root *root, s64 diff, int type),
+
+ TP_ARGS(root, diff, type),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, refroot )
+ __field( s64, diff )
+ __field( int, type )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->refroot = root->objectid;
+ __entry->diff = diff;
+ ),
+
+ TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
+ show_root_type(__entry->refroot),
+ show_qgroup_rsv_type(__entry->type), __entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_convert,
+
+ TP_PROTO(struct btrfs_root *root, s64 diff),
+
+ TP_ARGS(root, diff),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, refroot )
+ __field( s64, diff )
+ __field( int, type )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->refroot = root->objectid;
+ __entry->diff = diff;
+ ),
+
+ TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
+ show_root_type(__entry->refroot),
+ show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
+ show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
+ __entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_free_all_pertrans,
+
+ TP_PROTO(struct btrfs_root *root),
+
+ TP_ARGS(root),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, refroot )
+ __field( s64, diff )
+ __field( int, type )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->refroot = root->objectid;
+ spin_lock(&root->qgroup_meta_rsv_lock);
+ __entry->diff = -(s64)root->qgroup_meta_rsv_pertrans;
+ spin_unlock(&root->qgroup_meta_rsv_lock);
+ __entry->type = BTRFS_QGROUP_RSV_META_PERTRANS;
+ ),
+
+ TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
+ show_root_type(__entry->refroot),
+ show_qgroup_rsv_type(__entry->type), __entry->diff)
+);
+
+TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
+ TP_PROTO(struct btrfs_root *root, u64 ino, int mod),
+
+ TP_ARGS(root, ino, mod),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, root_objectid )
+ __field( u64, ino )
+ __field( int, mod )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->root_objectid = root->objectid;
+ __entry->ino = ino;
+ __entry->mod = mod;
+ ),
+
+ TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
+ show_root_type(__entry->root_objectid),
+ (unsigned long long)__entry->ino, __entry->mod)
+);
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/kernel/resource.c b/kernel/resource.c
index 698c871aef58..123d34899852 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -343,7 +343,7 @@ int release_resource(struct resource *old)
EXPORT_SYMBOL(release_resource);
/*
- * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
+ * Finds the lowest iomem resource existing within [res->start..res->end].
* the caller must specify res->start, res->end, res->flags and "name".
* If found, returns 0, res is overwritten, if not found, returns -1.
* This walks through whole tree and not just first level children
@@ -376,7 +376,7 @@ static int find_next_iomem_res(struct resource *res, char *name,
p = NULL;
break;
}
- if ((p->end >= start) && (p->start < end))
+ if ((p->end >= start) && (p->start <= end))
break;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 20ff146167d4..78eadcebf7c1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1627,6 +1627,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
+
+ flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
spin_unlock(old_ptl);
}
out:
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index cc6cd938e8be..f73b9205992e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -639,6 +639,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ cond_resched();
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
map_offset = 0;
if (ret)
diff --git a/mm/mremap.c b/mm/mremap.c
index fe7b7f65f4f4..c8daa308c92c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -96,6 +96,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
+ unsigned long len = old_end - old_addr;
/*
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
@@ -149,6 +150,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
}
arch_leave_lazy_mmu_mode();
+ flush_tlb_range(vma, old_end - len, old_end);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
@@ -168,7 +170,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
{
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
- bool need_flush = false;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -206,12 +207,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (need_rmap_locks)
anon_vma_unlock_write(vma->anon_vma);
}
- if (err > 0) {
- need_flush = true;
+ if (err > 0)
continue;
- } else if (!err) {
+ else if (!err)
split_huge_page_pmd(vma, old_addr, old_pmd);
- }
VM_BUG_ON(pmd_trans_huge(*old_pmd));
}
if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,
@@ -224,10 +223,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
extent = LATENCY_LIMIT;
move_ptes(vma, old_pmd, old_addr, old_addr + extent,
new_vma, new_pmd, new_addr, need_rmap_locks);
- need_flush = true;
}
- if (likely(need_flush))
- flush_tlb_range(vma, old_end-len, old_addr);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a6f639808be2..84b57e81a671 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2673,6 +2673,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
break;
case ETHTOOL_GPHYSTATS:
rc = ethtool_get_phy_stats(dev, useraddr);
+ break;
case ETHTOOL_PERQUEUE:
rc = ethtool_set_per_queue(dev, useraddr);
break;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 2cee1cb5c47a..9cedbb739e3a 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -100,6 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
iterate_fd(p->files, 0, update_classid_sock,
(void *)(unsigned long)cs->classid);
task_unlock(p);
+ cond_resched();
}
css_task_iter_end(&it);
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index e4d90e50f6fe..be7344623192 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -138,6 +138,9 @@ int main(void)
DEVID(hv_vmbus_device_id);
DEVID_FIELD(hv_vmbus_device_id, guid);
+ DEVID(rpmsg_device_id);
+ DEVID_FIELD(rpmsg_device_id, name);
+
DEVID(i2c_device_id);
DEVID_FIELD(i2c_device_id, name);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 1be42e3c3406..8afdc46bf5f3 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -926,6 +926,17 @@ static int do_vmbus_entry(const char *filename, void *symval,
}
ADD_TO_DEVTABLE("vmbus", hv_vmbus_device_id, do_vmbus_entry);
+/* Looks like: rpmsg:S */
+static int do_rpmsg_entry(const char *filename, void *symval,
+ char *alias)
+{
+ DEF_FIELD_ADDR(symval, rpmsg_device_id, name);
+ sprintf(alias, RPMSG_DEVICE_MODALIAS_FMT, *name);
+
+ return 1;
+}
+ADD_TO_DEVTABLE("rpmsg", rpmsg_device_id, do_rpmsg_entry);
+
/* Looks like: i2c:S */
static int do_i2c_entry(const char *filename, void *symval,
char *alias)