diff options
| author | Yu Kuai <yukuai@fnnas.com> | 2026-02-14 13:43:50 +0800 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2026-02-16 10:47:25 -0700 |
| commit | dfe48ea179733be948c432f6af2fc3913cf5dd28 (patch) | |
| tree | 5b931d881352843e79ab7d7509f4750dfe6d0284 /block | |
| parent | 3678a334a55e869b413e2fb4824e92200b149d73 (diff) | |
blk-mq: use NOIO context to prevent deadlock during debugfs creation
Creating debugfs entries can trigger fs reclaim, which can enter back
into the block layer request_queue. This can cause deadlock if the
queue is frozen.
Previously, a WARN_ON_ONCE check was used in debugfs_create_files()
to detect this condition, but it was racy since the queue can be frozen
from another context at any time.
Introduce blk_debugfs_lock()/blk_debugfs_unlock() helpers that combine
the debugfs_mutex with memalloc_noio_save()/restore() to prevent fs
reclaim from triggering block I/O. Also add blk_debugfs_lock_nomemsave()
and blk_debugfs_unlock_nomemrestore() variants for callers that don't
need NOIO protection (e.g., debugfs removal or read-only operations).
Replace all raw debugfs_mutex lock/unlock pairs with these helpers,
using the _nomemsave/_nomemrestore variants where appropriate.
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs9gNKEYAPagD9JADfO5UH+OiCr4P7OO2wjpfOYeM-RV=A@mail.gmail.com/
Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Closes: https://lore.kernel.org/all/aYWQR7CtYdk3K39g@shinmob/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
| -rw-r--r-- | block/blk-mq-debugfs.c | 10 | ||||
| -rw-r--r-- | block/blk-mq-sched.c | 9 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 9 | ||||
| -rw-r--r-- | block/blk-wbt.c | 10 | ||||
| -rw-r--r-- | block/blk.h | 31 |
5 files changed, 50 insertions, 19 deletions
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index faeaa1fc86a7..28167c9baa55 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -614,11 +614,6 @@ static void debugfs_create_files(struct request_queue *q, struct dentry *parent, { lockdep_assert_held(&q->debugfs_mutex); /* - * Creating new debugfs entries with queue freezed has the risk of - * deadlock. - */ - WARN_ON_ONCE(q->mq_freeze_depth != 0); - /* * debugfs_mutex should not be nested under other locks that can be * grabbed while queue is frozen. */ @@ -693,12 +688,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) void blk_mq_debugfs_register_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; + unsigned int memflags; unsigned long i; - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_hctx(q, hctx); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index e26898128a7e..97c3c8f45a9b 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -390,13 +390,14 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla void blk_mq_sched_reg_debugfs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; + unsigned int memflags; unsigned long i; - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); blk_mq_debugfs_register_sched(q); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_sched_hctx(q, hctx); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); } void blk_mq_sched_unreg_debugfs(struct request_queue *q) @@ -404,11 +405,11 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - mutex_lock(&q->debugfs_mutex); + blk_debugfs_lock_nomemsave(q); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_unregister_sched_hctx(hctx); blk_mq_debugfs_unregister_sched(q); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock_nomemrestore(q); } void blk_mq_free_sched_tags(struct elevator_tags *et, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 003aa684e854..f3b1968c80ce 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -892,13 +892,13 @@ static void blk_debugfs_remove(struct gendisk *disk) { struct request_queue *q = disk->queue; - mutex_lock(&q->debugfs_mutex); + blk_debugfs_lock_nomemsave(q); blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; q->sched_debugfs_dir = NULL; q->rqos_debugfs_dir = NULL; - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock_nomemrestore(q); } /** @@ -908,6 +908,7 @@ static void blk_debugfs_remove(struct gendisk *disk) int blk_register_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; + unsigned int memflags; int ret; ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); @@ -921,11 +922,11 @@ int blk_register_queue(struct gendisk *disk) } mutex_lock(&q->sysfs_lock); - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); if (queue_is_mq(q)) blk_mq_debugfs_register(q); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); ret = disk_register_independent_access_ranges(disk); if (ret) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 1415f2bf8611..6dba71e87387 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -776,6 +776,7 @@ void wbt_init_enable_default(struct gendisk *disk) { struct request_queue *q = disk->queue; struct rq_wb *rwb; + unsigned int memflags; if (!__wbt_enable_default(disk)) return; @@ -789,9 +790,9 @@ void wbt_init_enable_default(struct gendisk *disk) return; } - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); blk_mq_debugfs_register_rq_qos(q); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); } static u64 wbt_default_latency_nsec(struct request_queue *q) @@ -1015,9 +1016,10 @@ int wbt_set_lat(struct gendisk *disk, s64 val) blk_mq_unquiesce_queue(q); out: blk_mq_unfreeze_queue(q, memflags); - mutex_lock(&q->debugfs_mutex); + + memflags = blk_debugfs_lock(q); blk_mq_debugfs_register_rq_qos(q); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); return ret; } diff --git a/block/blk.h b/block/blk.h index a6b1de509733..f6053e9dd2aa 100644 --- a/block/blk.h +++ b/block/blk.h @@ -729,4 +729,35 @@ static inline void blk_unfreeze_release_lock(struct request_queue *q) } #endif +/* + * debugfs directory and file creation can trigger fs reclaim, which can enter + * back into the block layer request_queue. This can cause deadlock if the + * queue is frozen. Use NOIO context together with debugfs_mutex to prevent fs + * reclaim from triggering block I/O. + */ +static inline void blk_debugfs_lock_nomemsave(struct request_queue *q) +{ + mutex_lock(&q->debugfs_mutex); +} + +static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q) +{ + mutex_unlock(&q->debugfs_mutex); +} + +static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q) +{ + unsigned int memflags = memalloc_noio_save(); + + blk_debugfs_lock_nomemsave(q); + return memflags; +} + +static inline void blk_debugfs_unlock(struct request_queue *q, + unsigned int memflags) +{ + blk_debugfs_unlock_nomemrestore(q); + memalloc_noio_restore(memflags); +} + #endif /* BLK_INTERNAL_H */ |
