diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-03-27 12:22:45 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-03-27 12:22:45 -0700 |
| commit | 3577cfd738e29b3d54cdb10c45a56730346dfe8b (patch) | |
| tree | e4e48a452d8afd8fd9ca3622f2de6773a912aeed | |
| parent | 34892992d0ed45b4b0547f25e01887b56959fd5f (diff) | |
| parent | e31c53a8060e134111ed095783fee0aa0c43b080 (diff) | |
Merge tag 'xfs-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino:
"This includes a few important bug fixes, and some code refactoring
that was necessary for one of the fixes"
* tag 'xfs-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: remove file_path tracepoint data
xfs: don't irele after failing to iget in xfs_attri_recover_work
xfs: remove redundant validation in xlog_recover_attri_commit_pass2
xfs: fix ri_total validation in xlog_recover_attri_commit_pass2
xfs: close crash window in attr dabtree inactivation
xfs: factor out xfs_attr3_leaf_init
xfs: factor out xfs_attr3_node_entry_remove
xfs: only assert new size for datafork during truncate extents
xfs: annotate struct xfs_attr_list_context with __counted_by_ptr
xfs: cleanup buftarg handling in XFS_IOC_VERIFY_MEDIA
xfs: scrub: unlock dquot before early return in quota scrub
xfs: refactor xfsaild_push loop into helper
xfs: save ailp before dropping the AIL lock in push callbacks
xfs: avoid dereferencing log items after push callbacks
xfs: stop reclaim before pushing AIL during unmount
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr.h | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 22 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.h | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.c | 53 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.h | 2 | ||||
| -rw-r--r-- | fs/xfs/scrub/quota.c | 4 | ||||
| -rw-r--r-- | fs/xfs/scrub/trace.h | 12 | ||||
| -rw-r--r-- | fs/xfs/xfs_attr_inactive.c | 95 | ||||
| -rw-r--r-- | fs/xfs/xfs_attr_item.c | 51 | ||||
| -rw-r--r-- | fs/xfs/xfs_dquot_item.c | 9 | ||||
| -rw-r--r-- | fs/xfs/xfs_handle.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode_item.c | 9 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.c | 7 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 47 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans_ail.c | 127 | ||||
| -rw-r--r-- | fs/xfs/xfs_verify_media.c | 18 | ||||
| -rw-r--r-- | fs/xfs/xfs_xattr.c | 2 |
18 files changed, 274 insertions, 195 deletions
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 8244305949de..67fd9c75ac3f 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -55,7 +55,8 @@ struct xfs_attr_list_context { struct xfs_trans *tp; struct xfs_inode *dp; /* inode */ struct xfs_attrlist_cursor_kern cursor; /* position in list */ - void *buffer; /* output buffer */ + /* output buffer */ + void *buffer __counted_by_ptr(bufsize); /* * Abort attribute list iteration if non-zero. Can be used to pass diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 47f48ae555c0..2b78041e8672 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1416,6 +1416,28 @@ xfs_attr3_leaf_create( } /* + * Reinitialize an existing attr fork block as an empty leaf, and attach + * the buffer to tp. + */ +int +xfs_attr3_leaf_init( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t blkno) +{ + struct xfs_buf *bp = NULL; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .owner = dp->i_ino, + .geo = dp->i_mount->m_attr_geo, + }; + + ASSERT(tp != NULL); + + return xfs_attr3_leaf_create(&args, blkno, &bp); +} +/* * Split the leaf node, rebalance, then add the new entry. * * Returns 0 if the entry was added, 1 if a further split is needed or a diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index aca46da2bc50..72639efe6ac3 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp, /* * Routines used for shrinking the Btree. */ + +int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t blkno); int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval); void xfs_attr3_leaf_unbalance(struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk, diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 09d4c17b3e7b..ad801b7bd2dd 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath( } /* - * Remove an entry from an intermediate node. + * Internal implementation to remove an entry from an intermediate node. */ STATIC void -xfs_da3_node_remove( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk) +__xfs_da3_node_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_da_geometry *geo, + struct xfs_da_state_blk *drop_blk) { struct xfs_da_intnode *node; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int index; int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); @@ -1536,17 +1535,17 @@ xfs_da3_node_remove( tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); memmove(&btree[index], &btree[index + 1], tmp); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], tmp)); index = nodehdr.count - 1; } memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); + xfs_trans_log_buf(tp, drop_blk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size)); /* * Copy the last hash value from the block to propagate upwards. @@ -1555,6 +1554,38 @@ xfs_da3_node_remove( } /* + * Remove an entry from an intermediate node. + */ +STATIC void +xfs_da3_node_remove( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk) +{ + trace_xfs_da_node_remove(state->args); + + __xfs_da3_node_remove(state->args->trans, state->args->dp, + state->args->geo, drop_blk); +} + +/* + * Remove an entry from an intermediate attr node at the specified index. + */ +void +xfs_attr3_node_entry_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf *bp, + int index) +{ + struct xfs_da_state_blk blk = { + .index = index, + .bp = bp, + }; + + __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk); +} + +/* * Unbalance the elements between two intermediate nodes, * move all Btree elements from one node into another. */ diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 354d5d65043e..afcf2d3c7a21 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state); int xfs_da3_join(xfs_da_state_t *state); void xfs_da3_fixhashpath(struct xfs_da_state *state, struct xfs_da_state_path *path_to_to_fix); +void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, int index); /* * Routines used for finding things in the Btree. diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 1d25bd5b892e..222812fe202c 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -171,8 +171,10 @@ xchk_quota_item( error = xchk_quota_item_bmap(sc, dq, offset); xchk_iunlock(sc, XFS_ILOCK_SHARED); - if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) { + mutex_unlock(&dq->q_qlock); return error; + } /* * Warn if the hard limits are larger than the fs. diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 39ea651cbb75..286c5f5e0544 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; - __entry->ino = file_inode(xf->file)->i_ino; - path = file_path(xf->file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("xfino 0x%lx path '%s'", - __entry->ino, - __entry->pathname) + TP_printk("xfino 0x%lx", + __entry->ino) ); TRACE_EVENT(xfile_destroy, diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 92331991f9fd..a5b69c0fbfd0 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -140,7 +140,7 @@ xfs_attr3_node_inactive( xfs_daddr_t parent_blkno, child_blkno; struct xfs_buf *child_bp; struct xfs_da3_icnode_hdr ichdr; - int error, i; + int error; /* * Since this code is recursive (gasp!) we must protect ourselves. @@ -152,7 +152,7 @@ xfs_attr3_node_inactive( return -EFSCORRUPTED; } - xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); parent_blkno = xfs_buf_daddr(bp); if (!ichdr.count) { xfs_trans_brelse(*trans, bp); @@ -167,7 +167,7 @@ xfs_attr3_node_inactive( * over the leaves removing all of them. If this is higher up * in the tree, recurse downward. */ - for (i = 0; i < ichdr.count; i++) { + while (ichdr.count > 0) { /* * Read the subsidiary block to see what we have to work with. * Don't do this in a transaction. This is a depth-first @@ -218,29 +218,32 @@ xfs_attr3_node_inactive( xfs_trans_binval(*trans, child_bp); child_bp = NULL; + error = xfs_da3_node_read_mapped(*trans, dp, + parent_blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; + /* - * If we're not done, re-read the parent to get the next - * child block number. + * Remove entry from parent node, prevents being indexed to. */ - if (i + 1 < ichdr.count) { - struct xfs_da3_icnode_hdr phdr; + xfs_attr3_node_entry_remove(*trans, dp, bp, 0); + + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); + bp = NULL; - error = xfs_da3_node_read_mapped(*trans, dp, - parent_blkno, &bp, XFS_ATTR_FORK); + if (ichdr.count > 0) { + /* + * If we're not done, get the next child block number. + */ + child_fsb = be32_to_cpu(ichdr.btree[0].before); + + /* + * Atomically commit the whole invalidate stuff. + */ + error = xfs_trans_roll_inode(trans, dp); if (error) return error; - xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr, - bp->b_addr); - child_fsb = be32_to_cpu(phdr.btree[i + 1].before); - xfs_trans_brelse(*trans, bp); - bp = NULL; } - /* - * Atomically commit the whole invalidate stuff. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; } return 0; @@ -257,10 +260,8 @@ xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { - struct xfs_mount *mp = dp->i_mount; struct xfs_da_blkinfo *info; struct xfs_buf *bp; - xfs_daddr_t blkno; int error; /* @@ -272,7 +273,6 @@ xfs_attr3_root_inactive( error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; - blkno = xfs_buf_daddr(bp); /* * Invalidate the tree, even if the "tree" is only a single leaf block. @@ -283,10 +283,26 @@ xfs_attr3_root_inactive( case cpu_to_be16(XFS_DA_NODE_MAGIC): case cpu_to_be16(XFS_DA3_NODE_MAGIC): error = xfs_attr3_node_inactive(trans, dp, bp, 1); + /* + * Empty root node block are not allowed, convert it to leaf. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): error = xfs_attr3_leaf_inactive(trans, dp, bp); + /* + * Reinit the leaf before truncating extents so that a crash + * mid-truncation leaves an empty leaf rather than one with + * entries that may reference freed remote value blocks. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; default: xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); @@ -295,21 +311,6 @@ xfs_attr3_root_inactive( xfs_trans_brelse(*trans, bp); break; } - if (error) - return error; - - /* - * Invalidate the incore copy of the root block. - */ - error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp); - if (error) - return error; - xfs_trans_binval(*trans, bp); /* remove from cache */ - /* - * Commit the invalidate and start the next transaction. - */ - error = xfs_trans_roll_inode(trans, dp); return error; } @@ -328,6 +329,7 @@ xfs_attr_inactive( { struct xfs_trans *trans; struct xfs_mount *mp; + struct xfs_buf *bp; int lock_mode = XFS_ILOCK_SHARED; int error = 0; @@ -363,10 +365,27 @@ xfs_attr_inactive( * removal below. */ if (dp->i_af.if_nextents > 0) { + /* + * Invalidate and truncate all blocks but leave the root block. + */ error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, + XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount)); + if (error) + goto out_cancel; + + /* + * Invalidate and truncate the root block and ensure that the + * operation is completed within a single transaction. + */ + error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK); + if (error) + goto out_cancel; + + xfs_trans_binval(trans, bp); error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out_cancel; diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 354472bf45f1..deab14f31b38 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -653,7 +653,6 @@ xfs_attri_recover_work( break; } if (error) { - xfs_irele(ip); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp, sizeof(*attrp)); return ERR_PTR(-EFSCORRUPTED); @@ -1047,8 +1046,8 @@ xlog_recover_attri_commit_pass2( break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: - /* Log item, attr name, attr value */ - if (item->ri_total != 3) { + /* Log item, attr name, optional attr value */ + if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; @@ -1132,52 +1131,6 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } - switch (op) { - case XFS_ATTRI_OP_FLAGS_REMOVE: - /* Regular remove operations operate only on names. */ - if (attr_value != NULL || value_len != 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - fallthrough; - case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: - case XFS_ATTRI_OP_FLAGS_PPTR_SET: - case XFS_ATTRI_OP_FLAGS_SET: - case XFS_ATTRI_OP_FLAGS_REPLACE: - /* - * Regular xattr set/remove/replace operations require a name - * and do not take a newname. Values are optional for set and - * replace. - * - * Name-value set/remove operations must have a name, do not - * take a newname, and can take a value. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: - /* - * Name-value replace operations require the caller to - * specify the old and new names and values explicitly. - * Values are optional. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - if (attr_new_name == NULL || new_name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - } - /* * Memory alloc failure will cause replay to abort. We attach the * name/value buffer to the recovered incore log item and drop our diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 491e2a7053a3..65a0e69c3d08 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push( struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_buf *bp; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push( goto out_unlock; } - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error == -EAGAIN) { @@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push( rval = XFS_ITEM_FLUSHING; } xfs_buf_relse(bp); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ out_relock_ail: - spin_lock(&lip->li_ailp->ail_lock); + spin_lock(&ailp->ail_lock); out_unlock: mutex_unlock(&dqp->q_qlock); return rval; diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index d1291ca15239..2b8617ae7ec2 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -443,8 +443,8 @@ xfs_ioc_attr_list( context.dp = dp; context.resynch = 1; context.attr_filter = xfs_attr_filter(flags); - context.buffer = buffer; context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.buffer = buffer; context.firstu = context.bufsize; context.put_listent = xfs_ioc_attr_put_listent; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 50c0404f9064..beaa26ec62da 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (icount_read(VFS_I(ip))) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); - ASSERT(new_size <= XFS_ISIZE(ip)); + if (whichfork == XFS_DATA_FORK) + ASSERT(new_size <= XFS_ISIZE(ip)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 8913036b8024..4ae81eed0442 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -746,6 +746,7 @@ xfs_inode_item_push( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = lip->li_buf; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -771,7 +772,7 @@ xfs_inode_item_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); /* * We need to hold a reference for flushing the cluster buffer as it may @@ -795,7 +796,11 @@ xfs_inode_item_push( rval = XFS_ITEM_LOCKED; } - spin_lock(&lip->li_ailp->ail_lock); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ + spin_lock(&ailp->ail_lock); return rval; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9c295abd0a0a..ef1ea8a1238c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -608,8 +608,9 @@ xfs_unmount_check( * have been retrying in the background. This will prevent never-ending * retries in AIL pushing from hanging the unmount. * - * Finally, we can push the AIL to clean all the remaining dirty objects, then - * reclaim the remaining inodes that are still in memory at this point in time. + * Stop inodegc and background reclaim before pushing the AIL so that they + * are not running while the AIL is being flushed. Then push the AIL to + * clean all the remaining dirty objects and reclaim the remaining inodes. */ static void xfs_unmount_flush_inodes( @@ -621,9 +622,9 @@ xfs_unmount_flush_inodes( xfs_set_unmounting(mp); - xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); xfs_healthmon_unmount(mp); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 813e5a9f57eb..5e8190fe2be9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -56,6 +56,7 @@ #include <linux/tracepoint.h> struct xfs_agf; +struct xfs_ail; struct xfs_alloc_arg; struct xfs_attr_list_context; struct xfs_buf_log_item; @@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force, DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ TP_ARGS(lip)) -DEFINE_LOG_ITEM_EVENT(xfs_ail_push); -DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); -DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); -DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); +DECLARE_EVENT_CLASS(xfs_ail_push_class, + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), + TP_ARGS(ailp, type, flags, lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint, type) + __field(unsigned long, flags) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = ailp->ail_log->l_mp->m_super->s_dev; + __entry->type = type; + __entry->flags = flags; + __entry->lsn = lsn; + ), + TP_printk("dev %d:%d lsn %d/%d type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_AIL_PUSH_EVENT(name) \ +DEFINE_EVENT(xfs_ail_push_class, name, \ + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \ + TP_ARGS(ailp, type, flags, lsn)) +DEFINE_AIL_PUSH_EVENT(xfs_ail_push); +DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned); +DEFINE_AIL_PUSH_EVENT(xfs_ail_locked); +DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing); + DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), TP_ARGS(lip, old_lsn, new_lsn), @@ -5091,23 +5119,16 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - path = file_path(file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("dev %d:%d xmino 0x%lx path '%s'", + TP_printk("dev %d:%d xmino 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->pathname) + __entry->ino) ); TRACE_EVENT(xmbuf_free, diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 923729af4206..99a9bf3762b7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -365,6 +365,12 @@ xfsaild_resubmit_item( return XFS_ITEM_SUCCESS; } +/* + * Push a single log item from the AIL. + * + * @lip may have been released and freed by the time this function returns, + * so callers must not dereference the log item afterwards. + */ static inline uint xfsaild_push_item( struct xfs_ail *ailp, @@ -458,6 +464,74 @@ xfs_ail_calc_push_target( return target_lsn; } +static void +xfsaild_process_logitem( + struct xfs_ail *ailp, + struct xfs_log_item *lip, + int *stuck, + int *flushing) +{ + struct xfs_mount *mp = ailp->ail_log->l_mp; + uint type = lip->li_type; + unsigned long flags = lip->li_flags; + xfs_lsn_t item_lsn = lip->li_lsn; + int lock_result; + + /* + * Note that iop_push may unlock and reacquire the AIL lock. We + * rely on the AIL cursor implementation to be able to deal with + * the dropped lock. + * + * The log item may have been freed by the push, so it must not + * be accessed or dereferenced below this line. + */ + lock_result = xfsaild_push_item(ailp, lip); + switch (lock_result) { + case XFS_ITEM_SUCCESS: + XFS_STATS_INC(mp, xs_push_ail_success); + trace_xfs_ail_push(ailp, type, flags, item_lsn); + + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_FLUSHING: + /* + * The item or its backing buffer is already being + * flushed. The typical reason for that is that an + * inode buffer is locked because we already pushed the + * updates to it as part of inode clustering. + * + * We do not want to stop flushing just because lots + * of items are already being flushed, but we need to + * re-try the flushing relatively soon if most of the + * AIL is being flushed. + */ + XFS_STATS_INC(mp, xs_push_ail_flushing); + trace_xfs_ail_flushing(ailp, type, flags, item_lsn); + + (*flushing)++; + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_PINNED: + XFS_STATS_INC(mp, xs_push_ail_pinned); + trace_xfs_ail_pinned(ailp, type, flags, item_lsn); + + (*stuck)++; + ailp->ail_log_flush++; + break; + case XFS_ITEM_LOCKED: + XFS_STATS_INC(mp, xs_push_ail_locked); + trace_xfs_ail_locked(ailp, type, flags, item_lsn); + + (*stuck)++; + break; + default: + ASSERT(0); + break; + } +} + static long xfsaild_push( struct xfs_ail *ailp) @@ -505,62 +579,11 @@ xfsaild_push( lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { - int lock_result; if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) goto next_item; - /* - * Note that iop_push may unlock and reacquire the AIL lock. We - * rely on the AIL cursor implementation to be able to deal with - * the dropped lock. - */ - lock_result = xfsaild_push_item(ailp, lip); - switch (lock_result) { - case XFS_ITEM_SUCCESS: - XFS_STATS_INC(mp, xs_push_ail_success); - trace_xfs_ail_push(lip); - - ailp->ail_last_pushed_lsn = lsn; - break; - - case XFS_ITEM_FLUSHING: - /* - * The item or its backing buffer is already being - * flushed. The typical reason for that is that an - * inode buffer is locked because we already pushed the - * updates to it as part of inode clustering. - * - * We do not want to stop flushing just because lots - * of items are already being flushed, but we need to - * re-try the flushing relatively soon if most of the - * AIL is being flushed. - */ - XFS_STATS_INC(mp, xs_push_ail_flushing); - trace_xfs_ail_flushing(lip); - - flushing++; - ailp->ail_last_pushed_lsn = lsn; - break; - - case XFS_ITEM_PINNED: - XFS_STATS_INC(mp, xs_push_ail_pinned); - trace_xfs_ail_pinned(lip); - - stuck++; - ailp->ail_log_flush++; - break; - case XFS_ITEM_LOCKED: - XFS_STATS_INC(mp, xs_push_ail_locked); - trace_xfs_ail_locked(lip); - - stuck++; - break; - default: - ASSERT(0); - break; - } - + xfsaild_process_logitem(ailp, lip, &stuck, &flushing); count++; /* diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c index 8bbd4ec567f8..5ead3976d511 100644 --- a/fs/xfs/xfs_verify_media.c +++ b/fs/xfs/xfs_verify_media.c @@ -183,10 +183,9 @@ xfs_verify_iosize( min_not_zero(SZ_1M, me->me_max_io_size); BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT); - ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev)); + ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize); - return clamp(iosize, bdev_logical_block_size(btp->bt_bdev), - BBTOB(bbcount)); + return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount)); } /* Allocate as much memory as we can get for verification buffer. */ @@ -218,8 +217,8 @@ xfs_verify_media_error( unsigned int bio_bbcount, blk_status_t bio_status) { - trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr, - bio_bbcount, bio_status); + trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount, + bio_status); /* * Pass any error, I/O or otherwise, up to the caller if we didn't @@ -280,7 +279,7 @@ xfs_verify_media( btp = mp->m_ddev_targp; break; case XFS_DEV_LOG: - if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) + if (mp->m_logdev_targp != mp->m_ddev_targp) btp = mp->m_logdev_targp; break; case XFS_DEV_RT: @@ -299,7 +298,7 @@ xfs_verify_media( /* start and end have to be aligned to the lba size */ if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr), - bdev_logical_block_size(btp->bt_bdev))) + btp->bt_logical_sectorsize)) return -EINVAL; /* @@ -331,8 +330,7 @@ xfs_verify_media( if (!folio) return -ENOMEM; - trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount, - folio); + trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio); bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL); if (!bio) { @@ -400,7 +398,7 @@ out_folio: * an operational error. */ me->me_start_daddr = daddr; - trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev); + trace_xfs_verify_media_end(mp, me, btp->bt_dev); return 0; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index a735f16d9cd8..544213067d59 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -332,8 +332,8 @@ xfs_vn_listxattr( memset(&context, 0, sizeof(context)); context.dp = XFS_I(inode); context.resynch = 1; - context.buffer = size ? data : NULL; context.bufsize = size; + context.buffer = size ? data : NULL; context.firstu = context.bufsize; context.put_listent = xfs_xattr_put_listent; |
