summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-03-27 12:22:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-03-27 12:22:45 -0700
commit3577cfd738e29b3d54cdb10c45a56730346dfe8b (patch)
treee4e48a452d8afd8fd9ca3622f2de6773a912aeed
parent34892992d0ed45b4b0547f25e01887b56959fd5f (diff)
parente31c53a8060e134111ed095783fee0aa0c43b080 (diff)
Merge tag 'xfs-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino: "This includes a few important bug fixes, and some code refactoring that was necessary for one of the fixes" * tag 'xfs-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: remove file_path tracepoint data xfs: don't irele after failing to iget in xfs_attri_recover_work xfs: remove redundant validation in xlog_recover_attri_commit_pass2 xfs: fix ri_total validation in xlog_recover_attri_commit_pass2 xfs: close crash window in attr dabtree inactivation xfs: factor out xfs_attr3_leaf_init xfs: factor out xfs_attr3_node_entry_remove xfs: only assert new size for datafork during truncate extents xfs: annotate struct xfs_attr_list_context with __counted_by_ptr xfs: cleanup buftarg handling in XFS_IOC_VERIFY_MEDIA xfs: scrub: unlock dquot before early return in quota scrub xfs: refactor xfsaild_push loop into helper xfs: save ailp before dropping the AIL lock in push callbacks xfs: avoid dereferencing log items after push callbacks xfs: stop reclaim before pushing AIL during unmount
-rw-r--r--fs/xfs/libxfs/xfs_attr.h3
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c22
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c53
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h2
-rw-r--r--fs/xfs/scrub/quota.c4
-rw-r--r--fs/xfs/scrub/trace.h12
-rw-r--r--fs/xfs/xfs_attr_inactive.c95
-rw-r--r--fs/xfs/xfs_attr_item.c51
-rw-r--r--fs/xfs/xfs_dquot_item.c9
-rw-r--r--fs/xfs/xfs_handle.c2
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_trace.h47
-rw-r--r--fs/xfs/xfs_trans_ail.c127
-rw-r--r--fs/xfs/xfs_verify_media.c18
-rw-r--r--fs/xfs/xfs_xattr.c2
18 files changed, 274 insertions, 195 deletions
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 8244305949de..67fd9c75ac3f 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -55,7 +55,8 @@ struct xfs_attr_list_context {
struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct xfs_attrlist_cursor_kern cursor; /* position in list */
- void *buffer; /* output buffer */
+ /* output buffer */
+ void *buffer __counted_by_ptr(bufsize);
/*
* Abort attribute list iteration if non-zero. Can be used to pass
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 47f48ae555c0..2b78041e8672 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1416,6 +1416,28 @@ xfs_attr3_leaf_create(
}
/*
+ * Reinitialize an existing attr fork block as an empty leaf, and attach
+ * the buffer to tp.
+ */
+int
+xfs_attr3_leaf_init(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ xfs_dablk_t blkno)
+{
+ struct xfs_buf *bp = NULL;
+ struct xfs_da_args args = {
+ .trans = tp,
+ .dp = dp,
+ .owner = dp->i_ino,
+ .geo = dp->i_mount->m_attr_geo,
+ };
+
+ ASSERT(tp != NULL);
+
+ return xfs_attr3_leaf_create(&args, blkno, &bp);
+}
+/*
* Split the leaf node, rebalance, then add the new entry.
*
* Returns 0 if the entry was added, 1 if a further split is needed or a
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index aca46da2bc50..72639efe6ac3 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
/*
* Routines used for shrinking the Btree.
*/
+
+int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t blkno);
int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
void xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
struct xfs_da_state_blk *drop_blk,
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 09d4c17b3e7b..ad801b7bd2dd 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath(
}
/*
- * Remove an entry from an intermediate node.
+ * Internal implementation to remove an entry from an intermediate node.
*/
STATIC void
-xfs_da3_node_remove(
- struct xfs_da_state *state,
- struct xfs_da_state_blk *drop_blk)
+__xfs_da3_node_remove(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_da_geometry *geo,
+ struct xfs_da_state_blk *drop_blk)
{
struct xfs_da_intnode *node;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da_node_entry *btree;
int index;
int tmp;
- struct xfs_inode *dp = state->args->dp;
-
- trace_xfs_da_node_remove(state->args);
node = drop_blk->bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
@@ -1536,17 +1535,17 @@ xfs_da3_node_remove(
tmp = nodehdr.count - index - 1;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
memmove(&btree[index], &btree[index + 1], tmp);
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(tp, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], tmp));
index = nodehdr.count - 1;
}
memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(tp, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
nodehdr.count -= 1;
xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
+ xfs_trans_log_buf(tp, drop_blk->bp,
+ XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size));
/*
* Copy the last hash value from the block to propagate upwards.
@@ -1555,6 +1554,38 @@ xfs_da3_node_remove(
}
/*
+ * Remove an entry from an intermediate node.
+ */
+STATIC void
+xfs_da3_node_remove(
+ struct xfs_da_state *state,
+ struct xfs_da_state_blk *drop_blk)
+{
+ trace_xfs_da_node_remove(state->args);
+
+ __xfs_da3_node_remove(state->args->trans, state->args->dp,
+ state->args->geo, drop_blk);
+}
+
+/*
+ * Remove an entry from an intermediate attr node at the specified index.
+ */
+void
+xfs_attr3_node_entry_remove(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp,
+ int index)
+{
+ struct xfs_da_state_blk blk = {
+ .index = index,
+ .bp = bp,
+ };
+
+ __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk);
+}
+
+/*
* Unbalance the elements between two intermediate nodes,
* move all Btree elements from one node into another.
*/
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 354d5d65043e..afcf2d3c7a21 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state);
int xfs_da3_join(xfs_da_state_t *state);
void xfs_da3_fixhashpath(struct xfs_da_state *state,
struct xfs_da_state_path *path_to_to_fix);
+void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp,
+ struct xfs_buf *bp, int index);
/*
* Routines used for finding things in the Btree.
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 1d25bd5b892e..222812fe202c 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -171,8 +171,10 @@ xchk_quota_item(
error = xchk_quota_item_bmap(sc, dq, offset);
xchk_iunlock(sc, XFS_ILOCK_SHARED);
- if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
+ if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) {
+ mutex_unlock(&dq->q_qlock);
return error;
+ }
/*
* Warn if the hard limits are larger than the fs.
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 39ea651cbb75..286c5f5e0544 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char *path;
-
__entry->ino = file_inode(xf->file)->i_ino;
- path = file_path(xf->file, __entry->pathname, MAXNAMELEN);
- if (IS_ERR(path))
- strncpy(__entry->pathname, "(unknown)",
- sizeof(__entry->pathname));
),
- TP_printk("xfino 0x%lx path '%s'",
- __entry->ino,
- __entry->pathname)
+ TP_printk("xfino 0x%lx",
+ __entry->ino)
);
TRACE_EVENT(xfile_destroy,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 92331991f9fd..a5b69c0fbfd0 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -140,7 +140,7 @@ xfs_attr3_node_inactive(
xfs_daddr_t parent_blkno, child_blkno;
struct xfs_buf *child_bp;
struct xfs_da3_icnode_hdr ichdr;
- int error, i;
+ int error;
/*
* Since this code is recursive (gasp!) we must protect ourselves.
@@ -152,7 +152,7 @@ xfs_attr3_node_inactive(
return -EFSCORRUPTED;
}
- xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr);
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr);
parent_blkno = xfs_buf_daddr(bp);
if (!ichdr.count) {
xfs_trans_brelse(*trans, bp);
@@ -167,7 +167,7 @@ xfs_attr3_node_inactive(
* over the leaves removing all of them. If this is higher up
* in the tree, recurse downward.
*/
- for (i = 0; i < ichdr.count; i++) {
+ while (ichdr.count > 0) {
/*
* Read the subsidiary block to see what we have to work with.
* Don't do this in a transaction. This is a depth-first
@@ -218,29 +218,32 @@ xfs_attr3_node_inactive(
xfs_trans_binval(*trans, child_bp);
child_bp = NULL;
+ error = xfs_da3_node_read_mapped(*trans, dp,
+ parent_blkno, &bp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
/*
- * If we're not done, re-read the parent to get the next
- * child block number.
+ * Remove entry from parent node, prevents being indexed to.
*/
- if (i + 1 < ichdr.count) {
- struct xfs_da3_icnode_hdr phdr;
+ xfs_attr3_node_entry_remove(*trans, dp, bp, 0);
+
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr);
+ bp = NULL;
- error = xfs_da3_node_read_mapped(*trans, dp,
- parent_blkno, &bp, XFS_ATTR_FORK);
+ if (ichdr.count > 0) {
+ /*
+ * If we're not done, get the next child block number.
+ */
+ child_fsb = be32_to_cpu(ichdr.btree[0].before);
+
+ /*
+ * Atomically commit the whole invalidate stuff.
+ */
+ error = xfs_trans_roll_inode(trans, dp);
if (error)
return error;
- xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr,
- bp->b_addr);
- child_fsb = be32_to_cpu(phdr.btree[i + 1].before);
- xfs_trans_brelse(*trans, bp);
- bp = NULL;
}
- /*
- * Atomically commit the whole invalidate stuff.
- */
- error = xfs_trans_roll_inode(trans, dp);
- if (error)
- return error;
}
return 0;
@@ -257,10 +260,8 @@ xfs_attr3_root_inactive(
struct xfs_trans **trans,
struct xfs_inode *dp)
{
- struct xfs_mount *mp = dp->i_mount;
struct xfs_da_blkinfo *info;
struct xfs_buf *bp;
- xfs_daddr_t blkno;
int error;
/*
@@ -272,7 +273,6 @@ xfs_attr3_root_inactive(
error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
if (error)
return error;
- blkno = xfs_buf_daddr(bp);
/*
* Invalidate the tree, even if the "tree" is only a single leaf block.
@@ -283,10 +283,26 @@ xfs_attr3_root_inactive(
case cpu_to_be16(XFS_DA_NODE_MAGIC):
case cpu_to_be16(XFS_DA3_NODE_MAGIC):
error = xfs_attr3_node_inactive(trans, dp, bp, 1);
+ /*
+ * Empty root node block are not allowed, convert it to leaf.
+ */
+ if (!error)
+ error = xfs_attr3_leaf_init(*trans, dp, 0);
+ if (!error)
+ error = xfs_trans_roll_inode(trans, dp);
break;
case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
error = xfs_attr3_leaf_inactive(trans, dp, bp);
+ /*
+ * Reinit the leaf before truncating extents so that a crash
+ * mid-truncation leaves an empty leaf rather than one with
+ * entries that may reference freed remote value blocks.
+ */
+ if (!error)
+ error = xfs_attr3_leaf_init(*trans, dp, 0);
+ if (!error)
+ error = xfs_trans_roll_inode(trans, dp);
break;
default:
xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
@@ -295,21 +311,6 @@ xfs_attr3_root_inactive(
xfs_trans_brelse(*trans, bp);
break;
}
- if (error)
- return error;
-
- /*
- * Invalidate the incore copy of the root block.
- */
- error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
- XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
- if (error)
- return error;
- xfs_trans_binval(*trans, bp); /* remove from cache */
- /*
- * Commit the invalidate and start the next transaction.
- */
- error = xfs_trans_roll_inode(trans, dp);
return error;
}
@@ -328,6 +329,7 @@ xfs_attr_inactive(
{
struct xfs_trans *trans;
struct xfs_mount *mp;
+ struct xfs_buf *bp;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
@@ -363,10 +365,27 @@ xfs_attr_inactive(
* removal below.
*/
if (dp->i_af.if_nextents > 0) {
+ /*
+ * Invalidate and truncate all blocks but leave the root block.
+ */
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK,
+ XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount));
+ if (error)
+ goto out_cancel;
+
+ /*
+ * Invalidate and truncate the root block and ensure that the
+ * operation is completed within a single transaction.
+ */
+ error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK);
+ if (error)
+ goto out_cancel;
+
+ xfs_trans_binval(trans, bp);
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out_cancel;
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 354472bf45f1..deab14f31b38 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -653,7 +653,6 @@ xfs_attri_recover_work(
break;
}
if (error) {
- xfs_irele(ip);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp,
sizeof(*attrp));
return ERR_PTR(-EFSCORRUPTED);
@@ -1047,8 +1046,8 @@ xlog_recover_attri_commit_pass2(
break;
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
- /* Log item, attr name, attr value */
- if (item->ri_total != 3) {
+ /* Log item, attr name, optional attr value */
+ if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
@@ -1132,52 +1131,6 @@ xlog_recover_attri_commit_pass2(
return -EFSCORRUPTED;
}
- switch (op) {
- case XFS_ATTRI_OP_FLAGS_REMOVE:
- /* Regular remove operations operate only on names. */
- if (attr_value != NULL || value_len != 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- fallthrough;
- case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
- case XFS_ATTRI_OP_FLAGS_PPTR_SET:
- case XFS_ATTRI_OP_FLAGS_SET:
- case XFS_ATTRI_OP_FLAGS_REPLACE:
- /*
- * Regular xattr set/remove/replace operations require a name
- * and do not take a newname. Values are optional for set and
- * replace.
- *
- * Name-value set/remove operations must have a name, do not
- * take a newname, and can take a value.
- */
- if (attr_name == NULL || name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- break;
- case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
- /*
- * Name-value replace operations require the caller to
- * specify the old and new names and values explicitly.
- * Values are optional.
- */
- if (attr_name == NULL || name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- if (attr_new_name == NULL || new_name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- break;
- }
-
/*
* Memory alloc failure will cause replay to abort. We attach the
* name/value buffer to the recovered incore log item and drop our
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 491e2a7053a3..65a0e69c3d08 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push(
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_buf *bp;
+ struct xfs_ail *ailp = lip->li_ailp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push(
goto out_unlock;
}
- spin_unlock(&lip->li_ailp->ail_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error == -EAGAIN) {
@@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push(
rval = XFS_ITEM_FLUSHING;
}
xfs_buf_relse(bp);
+ /*
+ * The buffer no longer protects the log item from reclaim, so
+ * do not reference lip after this point.
+ */
out_relock_ail:
- spin_lock(&lip->li_ailp->ail_lock);
+ spin_lock(&ailp->ail_lock);
out_unlock:
mutex_unlock(&dqp->q_qlock);
return rval;
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index d1291ca15239..2b8617ae7ec2 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -443,8 +443,8 @@ xfs_ioc_attr_list(
context.dp = dp;
context.resynch = 1;
context.attr_filter = xfs_attr_filter(flags);
- context.buffer = buffer;
context.bufsize = round_down(bufsize, sizeof(uint32_t));
+ context.buffer = buffer;
context.firstu = context.bufsize;
context.put_listent = xfs_ioc_attr_put_listent;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 50c0404f9064..beaa26ec62da 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags(
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
if (icount_read(VFS_I(ip)))
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
- ASSERT(new_size <= XFS_ISIZE(ip));
+ if (whichfork == XFS_DATA_FORK)
+ ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 8913036b8024..4ae81eed0442 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -746,6 +746,7 @@ xfs_inode_item_push(
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
struct xfs_buf *bp = lip->li_buf;
+ struct xfs_ail *ailp = lip->li_ailp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -771,7 +772,7 @@ xfs_inode_item_push(
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
- spin_unlock(&lip->li_ailp->ail_lock);
+ spin_unlock(&ailp->ail_lock);
/*
* We need to hold a reference for flushing the cluster buffer as it may
@@ -795,7 +796,11 @@ xfs_inode_item_push(
rval = XFS_ITEM_LOCKED;
}
- spin_lock(&lip->li_ailp->ail_lock);
+ /*
+ * The buffer no longer protects the log item from reclaim, so
+ * do not reference lip after this point.
+ */
+ spin_lock(&ailp->ail_lock);
return rval;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9c295abd0a0a..ef1ea8a1238c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -608,8 +608,9 @@ xfs_unmount_check(
* have been retrying in the background. This will prevent never-ending
* retries in AIL pushing from hanging the unmount.
*
- * Finally, we can push the AIL to clean all the remaining dirty objects, then
- * reclaim the remaining inodes that are still in memory at this point in time.
+ * Stop inodegc and background reclaim before pushing the AIL so that they
+ * are not running while the AIL is being flushed. Then push the AIL to
+ * clean all the remaining dirty objects and reclaim the remaining inodes.
*/
static void
xfs_unmount_flush_inodes(
@@ -621,9 +622,9 @@ xfs_unmount_flush_inodes(
xfs_set_unmounting(mp);
- xfs_ail_push_all_sync(mp->m_ail);
xfs_inodegc_stop(mp);
cancel_delayed_work_sync(&mp->m_reclaim_work);
+ xfs_ail_push_all_sync(mp->m_ail);
xfs_reclaim_inodes(mp);
xfs_health_unmount(mp);
xfs_healthmon_unmount(mp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 813e5a9f57eb..5e8190fe2be9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -56,6 +56,7 @@
#include <linux/tracepoint.h>
struct xfs_agf;
+struct xfs_ail;
struct xfs_alloc_arg;
struct xfs_attr_list_context;
struct xfs_buf_log_item;
@@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force,
DEFINE_EVENT(xfs_log_item_class, name, \
TP_PROTO(struct xfs_log_item *lip), \
TP_ARGS(lip))
-DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
+DECLARE_EVENT_CLASS(xfs_ail_push_class,
+ TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn),
+ TP_ARGS(ailp, type, flags, lsn),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(uint, type)
+ __field(unsigned long, flags)
+ __field(xfs_lsn_t, lsn)
+ ),
+ TP_fast_assign(
+ __entry->dev = ailp->ail_log->l_mp->m_super->s_dev;
+ __entry->type = type;
+ __entry->flags = flags;
+ __entry->lsn = lsn;
+ ),
+ TP_printk("dev %d:%d lsn %d/%d type %s flags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn),
+ __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+ __print_flags(__entry->flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_AIL_PUSH_EVENT(name) \
+DEFINE_EVENT(xfs_ail_push_class, name, \
+ TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \
+ TP_ARGS(ailp, type, flags, lsn))
+DEFINE_AIL_PUSH_EVENT(xfs_ail_push);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_locked);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing);
+
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
TP_ARGS(lip, old_lsn, new_lsn),
@@ -5091,23 +5119,16 @@ TRACE_EVENT(xmbuf_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char *path;
struct file *file = btp->bt_file;
__entry->dev = btp->bt_mount->m_super->s_dev;
__entry->ino = file_inode(file)->i_ino;
- path = file_path(file, __entry->pathname, MAXNAMELEN);
- if (IS_ERR(path))
- strncpy(__entry->pathname, "(unknown)",
- sizeof(__entry->pathname));
),
- TP_printk("dev %d:%d xmino 0x%lx path '%s'",
+ TP_printk("dev %d:%d xmino 0x%lx",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->pathname)
+ __entry->ino)
);
TRACE_EVENT(xmbuf_free,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 923729af4206..99a9bf3762b7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -365,6 +365,12 @@ xfsaild_resubmit_item(
return XFS_ITEM_SUCCESS;
}
+/*
+ * Push a single log item from the AIL.
+ *
+ * @lip may have been released and freed by the time this function returns,
+ * so callers must not dereference the log item afterwards.
+ */
static inline uint
xfsaild_push_item(
struct xfs_ail *ailp,
@@ -458,6 +464,74 @@ xfs_ail_calc_push_target(
return target_lsn;
}
+static void
+xfsaild_process_logitem(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+ int *stuck,
+ int *flushing)
+{
+ struct xfs_mount *mp = ailp->ail_log->l_mp;
+ uint type = lip->li_type;
+ unsigned long flags = lip->li_flags;
+ xfs_lsn_t item_lsn = lip->li_lsn;
+ int lock_result;
+
+ /*
+ * Note that iop_push may unlock and reacquire the AIL lock. We
+ * rely on the AIL cursor implementation to be able to deal with
+ * the dropped lock.
+ *
+ * The log item may have been freed by the push, so it must not
+ * be accessed or dereferenced below this line.
+ */
+ lock_result = xfsaild_push_item(ailp, lip);
+ switch (lock_result) {
+ case XFS_ITEM_SUCCESS:
+ XFS_STATS_INC(mp, xs_push_ail_success);
+ trace_xfs_ail_push(ailp, type, flags, item_lsn);
+
+ ailp->ail_last_pushed_lsn = item_lsn;
+ break;
+
+ case XFS_ITEM_FLUSHING:
+ /*
+ * The item or its backing buffer is already being
+ * flushed. The typical reason for that is that an
+ * inode buffer is locked because we already pushed the
+ * updates to it as part of inode clustering.
+ *
+ * We do not want to stop flushing just because lots
+ * of items are already being flushed, but we need to
+ * re-try the flushing relatively soon if most of the
+ * AIL is being flushed.
+ */
+ XFS_STATS_INC(mp, xs_push_ail_flushing);
+ trace_xfs_ail_flushing(ailp, type, flags, item_lsn);
+
+ (*flushing)++;
+ ailp->ail_last_pushed_lsn = item_lsn;
+ break;
+
+ case XFS_ITEM_PINNED:
+ XFS_STATS_INC(mp, xs_push_ail_pinned);
+ trace_xfs_ail_pinned(ailp, type, flags, item_lsn);
+
+ (*stuck)++;
+ ailp->ail_log_flush++;
+ break;
+ case XFS_ITEM_LOCKED:
+ XFS_STATS_INC(mp, xs_push_ail_locked);
+ trace_xfs_ail_locked(ailp, type, flags, item_lsn);
+
+ (*stuck)++;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
static long
xfsaild_push(
struct xfs_ail *ailp)
@@ -505,62 +579,11 @@ xfsaild_push(
lsn = lip->li_lsn;
while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) {
- int lock_result;
if (test_bit(XFS_LI_FLUSHING, &lip->li_flags))
goto next_item;
- /*
- * Note that iop_push may unlock and reacquire the AIL lock. We
- * rely on the AIL cursor implementation to be able to deal with
- * the dropped lock.
- */
- lock_result = xfsaild_push_item(ailp, lip);
- switch (lock_result) {
- case XFS_ITEM_SUCCESS:
- XFS_STATS_INC(mp, xs_push_ail_success);
- trace_xfs_ail_push(lip);
-
- ailp->ail_last_pushed_lsn = lsn;
- break;
-
- case XFS_ITEM_FLUSHING:
- /*
- * The item or its backing buffer is already being
- * flushed. The typical reason for that is that an
- * inode buffer is locked because we already pushed the
- * updates to it as part of inode clustering.
- *
- * We do not want to stop flushing just because lots
- * of items are already being flushed, but we need to
- * re-try the flushing relatively soon if most of the
- * AIL is being flushed.
- */
- XFS_STATS_INC(mp, xs_push_ail_flushing);
- trace_xfs_ail_flushing(lip);
-
- flushing++;
- ailp->ail_last_pushed_lsn = lsn;
- break;
-
- case XFS_ITEM_PINNED:
- XFS_STATS_INC(mp, xs_push_ail_pinned);
- trace_xfs_ail_pinned(lip);
-
- stuck++;
- ailp->ail_log_flush++;
- break;
- case XFS_ITEM_LOCKED:
- XFS_STATS_INC(mp, xs_push_ail_locked);
- trace_xfs_ail_locked(lip);
-
- stuck++;
- break;
- default:
- ASSERT(0);
- break;
- }
-
+ xfsaild_process_logitem(ailp, lip, &stuck, &flushing);
count++;
/*
diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
index 8bbd4ec567f8..5ead3976d511 100644
--- a/fs/xfs/xfs_verify_media.c
+++ b/fs/xfs/xfs_verify_media.c
@@ -183,10 +183,9 @@ xfs_verify_iosize(
min_not_zero(SZ_1M, me->me_max_io_size);
BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
- ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
+ ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize);
- return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
- BBTOB(bbcount));
+ return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount));
}
/* Allocate as much memory as we can get for verification buffer. */
@@ -218,8 +217,8 @@ xfs_verify_media_error(
unsigned int bio_bbcount,
blk_status_t bio_status)
{
- trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
- bio_bbcount, bio_status);
+ trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount,
+ bio_status);
/*
* Pass any error, I/O or otherwise, up to the caller if we didn't
@@ -280,7 +279,7 @@ xfs_verify_media(
btp = mp->m_ddev_targp;
break;
case XFS_DEV_LOG:
- if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev)
+ if (mp->m_logdev_targp != mp->m_ddev_targp)
btp = mp->m_logdev_targp;
break;
case XFS_DEV_RT:
@@ -299,7 +298,7 @@ xfs_verify_media(
/* start and end have to be aligned to the lba size */
if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr),
- bdev_logical_block_size(btp->bt_bdev)))
+ btp->bt_logical_sectorsize))
return -EINVAL;
/*
@@ -331,8 +330,7 @@ xfs_verify_media(
if (!folio)
return -ENOMEM;
- trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount,
- folio);
+ trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio);
bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL);
if (!bio) {
@@ -400,7 +398,7 @@ out_folio:
* an operational error.
*/
me->me_start_daddr = daddr;
- trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev);
+ trace_xfs_verify_media_end(mp, me, btp->bt_dev);
return 0;
}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index a735f16d9cd8..544213067d59 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -332,8 +332,8 @@ xfs_vn_listxattr(
memset(&context, 0, sizeof(context));
context.dp = XFS_I(inode);
context.resynch = 1;
- context.buffer = size ? data : NULL;
context.bufsize = size;
+ context.buffer = size ? data : NULL;
context.firstu = context.bufsize;
context.put_listent = xfs_xattr_put_listent;