Home Home > GIT Browse > SLE15-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2019-08-15 13:05:54 +0200
committerTakashi Iwai <tiwai@suse.de>2019-08-15 13:05:54 +0200
commit3e0a7e6b2b9bb829cf747fb1bc4ce1470007e0c6 (patch)
tree478084a370bbaf18aed95872c36f7842b9b868c3
parent6cd0bda4cb0f8c214109a54a6ea8c83c404b03c6 (diff)
parentd49f6c3569acd780e83563d345a658542888a8fb (diff)
Merge branch 'users/nborisov/SLE15/for-next' into SLE15
Pull xfs fixes from Nikolay Borisov suse-commit: 1c726d3fa7197474a18ab7fa8f4eec8862293cbc
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c199
-rw-r--r--fs/xfs/xfs_log.c61
-rw-r--r--fs/xfs/xfs_log_cil.c84
-rw-r--r--fs/xfs/xfs_log_priv.h1
4 files changed, 200 insertions, 145 deletions
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6bd916bd35e2..5f17641f040f 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -34,6 +34,9 @@
#include "xfs_trans_space.h"
#include "xfs_trace.h"
+#define _ALLOC true
+#define _FREE false
+
/*
* A buffer has a format structure overhead in the log in addition
* to the data, so we need to take this into account when reserving
@@ -132,43 +135,77 @@ xfs_calc_inode_res(
}
/*
- * The free inode btree is a conditional feature and the log reservation
- * requirements differ slightly from that of the traditional inode allocation
- * btree. The finobt tracks records for inode chunks with at least one free
- * inode. A record can be removed from the tree for an inode allocation
- * or free and thus the finobt reservation is unconditional across:
+ * Inode btree record insertion/removal modifies the inode btree and free space
+ * btrees (since the inobt does not use the agfl). This requires the following
+ * reservation:
*
- * - inode allocation
- * - inode free
- * - inode chunk allocation
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
*
- * The 'modify' param indicates to include the record modification scenario. The
- * 'alloc' param indicates to include the reservation for free space btree
- * modifications on behalf of finobt modifications. This is required only for
- * transactions that do not already account for free space btree modifications.
+ * The caller must account for SB and AG header modifications, etc.
+ */
+STATIC uint
+xfs_calc_inobt_res(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * The free inode btree is a conditional feature. The behavior differs slightly
+ * from that of the traditional inode btree in that the finobt tracks records
+ * for inode chunks with at least one free inode. A record can be removed from
+ * the tree during individual inode allocation. Therefore the finobt
+ * reservation is unconditional for both the inode chunk allocation and
+ * individual inode allocation (modify) cases.
*
- * the free inode btree: max depth * block size
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the free inode btree entry: block size
+ * Behavior aside, the reservation for finobt modification is equivalent to the
+ * traditional inobt: cover a full finobt shape change plus block allocation.
*/
STATIC uint
xfs_calc_finobt_res(
- struct xfs_mount *mp,
- int alloc,
- int modify)
+ struct xfs_mount *mp)
{
- uint res;
-
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
return 0;
- res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
- if (alloc)
- res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
- if (modify)
- res += (uint)XFS_FSB_TO_B(mp, 1);
+ return xfs_calc_inobt_res(mp);
+}
+/*
+ * Calculate the reservation required to allocate or free an inode chunk. This
+ * includes:
+ *
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the inode chunk: m_ialloc_blks * N
+ *
+ * The size N of the inode chunk reservation depends on whether it is for
+ * allocation or free and which type of create transaction is in use. An inode
+ * chunk free always invalidates the buffers and only requires reservation for
+ * headers (N == 0). An inode chunk allocation requires a chunk sized
+ * reservation on v4 and older superblocks to initialize the chunk. No chunk
+ * reservation is required for allocation on v5 supers, which use ordered
+ * buffers to initialize.
+ */
+STATIC uint
+xfs_calc_inode_chunk_res(
+ struct xfs_mount *mp,
+ bool alloc)
+{
+ uint res, size = 0;
+
+ res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+ if (alloc) {
+ /* icreate tx uses ordered buffers */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return res;
+ size = XFS_FSB_TO_B(mp, 1);
+ }
+
+ res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
return res;
}
@@ -232,8 +269,6 @@ xfs_calc_write_reservation(
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
@@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation(
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(5, 0) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(2 + mp->m_ialloc_blks +
- mp->m_in_maxlevels, 0)));
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -282,13 +312,14 @@ xfs_calc_rename_reservation(
* For removing an inode from unlinked list at first, we can modify:
* the agi hash list and counters: sector size
* the on disk inode before ours in the agi hash list: inode cluster size
+ * the on disk inode in the agi hash list: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_remove_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
@@ -320,13 +351,13 @@ xfs_calc_link_reservation(
/*
* For adding an inode to unlinked list we can modify:
* the agi hash list: sector size
- * the unlinked inode: inode size
+ * the on disk inode: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- xfs_calc_inode_res(mp, 1);
+ max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
@@ -379,45 +410,16 @@ xfs_calc_create_resv_modify(
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
(uint)XFS_FSB_TO_B(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 1, 1);
-}
-
-/*
- * For create we can allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the superblock for the nlink flag: sector size
- * the inode blocks allocated: mp->m_ialloc_blks * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_create_resv_alloc(
- struct xfs_mount *mp)
-{
- return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
- mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-__xfs_calc_create_reservation(
- struct xfs_mount *mp)
-{
- return XFS_DQUOT_LOGRES(mp) +
- MAX(xfs_calc_create_resv_alloc(mp),
- xfs_calc_create_resv_modify(mp));
+ xfs_calc_finobt_res(mp);
}
/*
* For icreate we can allocate some inodes giving:
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
* the superblock for the nlink flag: sector size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the finobt (record insertion)
+ * the inode chunk (allocation, optional init)
+ * the inobt (record insertion)
+ * the finobt (optional, record insertion)
*/
STATIC uint
xfs_calc_icreate_resv_alloc(
@@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc(
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 0, 0);
+ xfs_calc_inode_chunk_res(mp, _ALLOC) +
+ xfs_calc_inobt_res(mp) +
+ xfs_calc_finobt_res(mp);
}
STATIC uint
@@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp)
}
STATIC uint
-xfs_calc_create_reservation(
- struct xfs_mount *mp)
-{
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return xfs_calc_icreate_reservation(mp);
- return __xfs_calc_create_reservation(mp);
-
-}
-
-STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
uint res = XFS_DQUOT_LOGRES(mp);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- res += xfs_calc_icreate_resv_alloc(mp);
- else
- res += xfs_calc_create_resv_alloc(mp);
-
+ res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
}
@@ -470,7 +457,7 @@ STATIC uint
xfs_calc_mkdir_reservation(
struct xfs_mount *mp)
{
- return xfs_calc_create_reservation(mp);
+ return xfs_calc_icreate_reservation(mp);
}
@@ -483,20 +470,24 @@ STATIC uint
xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
- return xfs_calc_create_reservation(mp) +
+ return xfs_calc_icreate_reservation(mp) +
xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
}
/*
* In freeing an inode we can modify:
* the inode being freed: inode size
- * the super block free inode counter: sector size
- * the agi hash list and counters: sector size
- * the inode btree entry: block size
- * the on disk inode before ours in the agi hash list: inode cluster size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the super block free inode counter, AGF and AGFL: sector size
+ * the on disk inode (agi unlinked list removal)
+ * the inode chunk (invalidated, headers only)
+ * the inode btree
* the finobt (record insertion, removal or modification)
+ *
+ * Note that the inode chunk res. includes an allocfree res. for freeing of the
+ * inode chunk. This is technically extraneous because the inode chunk free is
+ * deferred (it occurs after a transaction roll). Include the extra reservation
+ * anyways since we've had reports of ifree transaction overruns due to too many
+ * agfl fixups during inode chunk frees.
*/
STATIC uint
xfs_calc_ifree_reservation(
@@ -504,15 +495,11 @@ xfs_calc_ifree_reservation(
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
- xfs_calc_buf_res(1, 0) +
- xfs_calc_buf_res(2 + mp->m_ialloc_blks +
- mp->m_in_maxlevels, 0) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 0, 1);
+ xfs_calc_inode_chunk_res(mp, _FREE) +
+ xfs_calc_inobt_res(mp) +
+ xfs_calc_finobt_res(mp);
}
/*
@@ -842,7 +829,7 @@ xfs_trans_resv_calc(
resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
- resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
+ resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2841926bdfa9..2847347d7f63 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2087,7 +2087,7 @@ xlog_print_tic_res(
};
#undef REG_TYPE_STR
- xfs_warn(mp, "xlog_write: reservation summary:");
+ xfs_warn(mp, "ticket reservation summary:");
xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res);
xfs_warn(mp, " current res = %d bytes",
@@ -2108,10 +2108,57 @@ xlog_print_tic_res(
"bad-rtype" : res_type_str[r_type]),
ticket->t_res_arr[i].r_len);
}
+}
+
+/*
+ * Print a summary of the transaction.
+ */
+void
+xlog_print_trans(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_log_item_desc *lidp;
+
+ /* dump core transaction and ticket info */
+ xfs_warn(mp, "transaction summary:");
+ xfs_warn(mp, " log res = %d", tp->t_log_res);
+ xfs_warn(mp, " log count = %d", tp->t_log_count);
+ xfs_warn(mp, " flags = 0x%x", tp->t_flags);
+
+ xlog_print_tic_res(mp, tp->t_ticket);
+
+ /* dump each log item */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+ struct xfs_log_vec *lv = lip->li_lv;
+ struct xfs_log_iovec *vec;
+ int i;
+
+ xfs_warn(mp, "log item: ");
+ xfs_warn(mp, " type = 0x%x", lip->li_type);
+ xfs_warn(mp, " flags = 0x%x", lip->li_flags);
+ if (!lv)
+ continue;
+ xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
+ xfs_warn(mp, " size = %d", lv->lv_size);
+ xfs_warn(mp, " bytes = %d", lv->lv_bytes);
+ xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
- xfs_alert_tag(mp, XFS_PTAG_LOGRES,
- "xlog_write: reservation ran out. Need to up reservation");
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ /* dump each iovec for the log item */
+ vec = lv->lv_iovecp;
+ for (i = 0; i < lv->lv_niovecs; i++) {
+ int dumplen = min(vec->i_len, 32);
+
+ xfs_warn(mp, " iovec[%d]", i);
+ xfs_warn(mp, " type = 0x%x", vec->i_type);
+ xfs_warn(mp, " len = %d", vec->i_len);
+ xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i);
+ xfs_hex_dump(vec->i_addr, dumplen);
+
+ vec++;
+ }
+ }
}
/*
@@ -2384,8 +2431,12 @@ xlog_write(
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
- if (ticket->t_curr_res < 0)
+ if (ticket->t_curr_res < 0) {
+ xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+ "ctx ticket reservation ran out. Need to up reservation");
xlog_print_tic_res(log->l_mp, ticket);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ }
index = 0;
lv = log_vector;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 1f53dc23aebe..950717856ced 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -410,6 +410,7 @@ xlog_cil_insert_items(
int len = 0;
int diff_iovecs = 0;
int iclog_space;
+ int iovhdr_res = 0, split_res = 0, ctx_res = 0;
ASSERT(tp);
@@ -419,30 +420,11 @@ xlog_cil_insert_items(
*/
xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
- /*
- * Now (re-)position everything modified at the tail of the CIL.
- * We do this here so we only need to take the CIL lock once during
- * the transaction commit.
- */
spin_lock(&cil->xc_cil_lock);
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- struct xfs_log_item *lip = lidp->lid_item;
-
- /* Skip items which aren't dirty in this transaction. */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
-
- /*
- * Only move the item if it isn't already at the tail. This is
- * to prevent a transient list_empty() state when reinserting
- * an item that is already the only item in the CIL.
- */
- if (!list_is_last(&lip->li_cil, &cil->xc_cil))
- list_move_tail(&lip->li_cil, &cil->xc_cil);
- }
/* account for space used by new iovec headers */
- len += diff_iovecs * sizeof(xlog_op_header_t);
+ iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
+ len += iovhdr_res;
ctx->nvecs += diff_iovecs;
/* attach the transaction to the CIL if it has any busy extents */
@@ -457,28 +439,66 @@ xlog_cil_insert_items(
* during the transaction commit.
*/
if (ctx->ticket->t_curr_res == 0) {
- ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ ctx_res = ctx->ticket->t_unit_res;
+ ctx->ticket->t_curr_res = ctx_res;
+ tp->t_ticket->t_curr_res -= ctx_res;
}
/* do we need space for more log record headers? */
iclog_space = log->l_iclog_size - log->l_iclog_hsize;
if (len > 0 && (ctx->space_used / iclog_space !=
(ctx->space_used + len) / iclog_space)) {
- int hdrs;
-
- hdrs = (len + iclog_space - 1) / iclog_space;
+ split_res = (len + iclog_space - 1) / iclog_space;
/* need to take into account split region headers, too */
- hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
- ctx->ticket->t_unit_res += hdrs;
- ctx->ticket->t_curr_res += hdrs;
- tp->t_ticket->t_curr_res -= hdrs;
+ split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+ ctx->ticket->t_unit_res += split_res;
+ ctx->ticket->t_curr_res += split_res;
+ tp->t_ticket->t_curr_res -= split_res;
ASSERT(tp->t_ticket->t_curr_res >= len);
}
tp->t_ticket->t_curr_res -= len;
ctx->space_used += len;
+ /*
+ * If we've overrun the reservation, dump the tx details before we move
+ * the log items. Shutdown is imminent...
+ */
+ if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+ xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+ xfs_warn(log->l_mp,
+ " log items: %d bytes (iov hdrs: %d bytes)",
+ len, iovhdr_res);
+ xfs_warn(log->l_mp, " split region headers: %d bytes",
+ split_res);
+ xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
+ xlog_print_trans(tp);
+ }
+
+ /*
+ * Now (re-)position everything modified at the tail of the CIL.
+ * We do this here so we only need to take the CIL lock once during
+ * the transaction commit.
+ */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
+
+ /*
+ * Only move the item if it isn't already at the tail. This is
+ * to prevent a transient list_empty() state when reinserting
+ * an item that is already the only item in the CIL.
+ */
+ if (!list_is_last(&lip->li_cil, &cil->xc_cil))
+ list_move_tail(&lip->li_cil, &cil->xc_cil);
+ }
+
spin_unlock(&cil->xc_cil_lock);
+
+ if (tp->t_ticket->t_curr_res < 0)
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
}
static void
@@ -987,10 +1007,6 @@ xfs_log_commit_cil(
xlog_cil_insert_items(log, tp);
- /* check we didn't blow the reservation */
- if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(mp, tp->t_ticket);
-
tp->t_commit_lsn = cil->xc_ctx->sequence;
if (commit_lsn)
*commit_lsn = tp->t_commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 1f112815aa0b..ecdc5e446202 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -459,6 +459,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
}
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+void xlog_print_trans(struct xfs_trans *);
int
xlog_write(
struct xlog *log,