summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-17 08:48:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-17 08:48:45 -0800
commit99dfe2d4da67d863ff8f185d1e8033cce28e4c49 (patch)
treea5afa6d4923bf972103f10c176d2c65d0f1ba9a1 /block
parent7b751b01ade7f666de2f5c365bd9562c2dcd7d60 (diff)
parentdfe48ea179733be948c432f6af2fc3913cf5dd28 (diff)
Merge tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull more block updates from Jens Axboe: - Fix partial IOVA mapping cleanup in error handling - Minor prep series ignoring discard return value, as the inline value is always known - Ensure BLK_FEAT_STABLE_WRITES is set for drbd - Fix leak of folio in bio_iov_iter_bounce_read() - Allow IOC_PR_READ_* for read-only open - Another debugfs deadlock fix - A few doc updates * tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: blk-mq: use NOIO context to prevent deadlock during debugfs creation blk-stat: convert struct blk_stat_callback to kernel-doc block: fix enum descriptions kernel-doc block: update docs for bio and bvec_iter block: change return type to void nvmet: ignore discard return value md: ignore discard return value block: fix partial IOVA mapping cleanup in blk_rq_dma_map_iova block: fix folio leak in bio_iov_iter_bounce_read() block: allow IOC_PR_READ_* ioctls with BLK_OPEN_READ drbd: always set BLK_FEAT_STABLE_WRITES
Diffstat (limited to 'block')
-rw-r--r--block/bio.c4
-rw-r--r--block/blk-lib.c3
-rw-r--r--block/blk-mq-debugfs.c10
-rw-r--r--block/blk-mq-dma.c13
-rw-r--r--block/blk-mq-sched.c9
-rw-r--r--block/blk-stat.h9
-rw-r--r--block/blk-sysfs.c9
-rw-r--r--block/blk-wbt.c10
-rw-r--r--block/blk.h31
-rw-r--r--block/ioctl.c34
10 files changed, 92 insertions, 40 deletions
diff --git a/block/bio.c b/block/bio.c
index b291b9aaeee1..8203bb7455a9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1382,8 +1382,10 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len,
&bio->bi_vcnt, bio->bi_max_vecs - 1, 0);
if (ret <= 0) {
- if (!bio->bi_vcnt)
+ if (!bio->bi_vcnt) {
+ folio_put(folio);
return ret;
+ }
break;
}
len -= ret;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 0be3acdc3eb5..3213afc7f0d5 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -60,7 +60,7 @@ struct bio *blk_alloc_discard_bio(struct block_device *bdev,
return bio;
}
-int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+void __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
struct bio *bio;
@@ -68,7 +68,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects,
gfp_mask)))
*biop = bio_chain_and_submit(*biop, bio);
- return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index faeaa1fc86a7..28167c9baa55 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -614,11 +614,6 @@ static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
{
lockdep_assert_held(&q->debugfs_mutex);
/*
- * Creating new debugfs entries with queue freezed has the risk of
- * deadlock.
- */
- WARN_ON_ONCE(q->mq_freeze_depth != 0);
- /*
* debugfs_mutex should not be nested under other locks that can be
* grabbed while queue is frozen.
*/
@@ -693,12 +688,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
void blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
+ unsigned int memflags;
unsigned long i;
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_register_hctx(q, hctx);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
}
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
index 3c87779cdc19..bfdb9ed70741 100644
--- a/block/blk-mq-dma.c
+++ b/block/blk-mq-dma.c
@@ -121,17 +121,20 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
vec->len, dir, attrs);
if (error)
- break;
+ goto out_unlink;
mapped += vec->len;
} while (blk_map_iter_next(req, &iter->iter, vec));
error = dma_iova_sync(dma_dev, state, 0, mapped);
- if (error) {
- iter->status = errno_to_blk_status(error);
- return false;
- }
+ if (error)
+ goto out_unlink;
return true;
+
+out_unlink:
+ dma_iova_destroy(dma_dev, state, mapped, dir, attrs);
+ iter->status = errno_to_blk_status(error);
+ return false;
}
static inline void blk_rq_map_iter_init(struct request *rq,
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index e26898128a7e..97c3c8f45a9b 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -390,13 +390,14 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla
void blk_mq_sched_reg_debugfs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
+ unsigned int memflags;
unsigned long i;
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
blk_mq_debugfs_register_sched(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_register_sched_hctx(q, hctx);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
}
void blk_mq_sched_unreg_debugfs(struct request_queue *q)
@@ -404,11 +405,11 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
unsigned long i;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_unregister_sched_hctx(hctx);
blk_mq_debugfs_unregister_sched(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
}
void blk_mq_free_sched_tags(struct elevator_tags *et,
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 9e05bf18d1be..cc5b66e7ee60 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -17,7 +17,7 @@
* timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked.
*/
struct blk_stat_callback {
- /*
+ /**
* @list: RCU list of callbacks for a &struct request_queue.
*/
struct list_head list;
@@ -50,7 +50,7 @@ struct blk_stat_callback {
struct blk_rq_stat *stat;
/**
- * @fn: Callback function.
+ * @timer_fn: Callback function.
*/
void (*timer_fn)(struct blk_stat_callback *);
@@ -59,6 +59,9 @@ struct blk_stat_callback {
*/
void *data;
+ /**
+ * @rcu: rcu list head
+ */
struct rcu_head rcu;
};
@@ -126,6 +129,8 @@ void blk_stat_free_callback(struct blk_stat_callback *cb);
* blk_stat_is_active() - Check if a block statistics callback is currently
* gathering statistics.
* @cb: The callback.
+ *
+ * Returns: %true iff the callback is active.
*/
static inline bool blk_stat_is_active(struct blk_stat_callback *cb)
{
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 003aa684e854..f3b1968c80ce 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -892,13 +892,13 @@ static void blk_debugfs_remove(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
blk_trace_shutdown(q);
debugfs_remove_recursive(q->debugfs_dir);
q->debugfs_dir = NULL;
q->sched_debugfs_dir = NULL;
q->rqos_debugfs_dir = NULL;
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
}
/**
@@ -908,6 +908,7 @@ static void blk_debugfs_remove(struct gendisk *disk)
int blk_register_queue(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
+ unsigned int memflags;
int ret;
ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
@@ -921,11 +922,11 @@ int blk_register_queue(struct gendisk *disk)
}
mutex_lock(&q->sysfs_lock);
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
if (queue_is_mq(q))
blk_mq_debugfs_register(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
ret = disk_register_independent_access_ranges(disk);
if (ret)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 1415f2bf8611..6dba71e87387 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -776,6 +776,7 @@ void wbt_init_enable_default(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct rq_wb *rwb;
+ unsigned int memflags;
if (!__wbt_enable_default(disk))
return;
@@ -789,9 +790,9 @@ void wbt_init_enable_default(struct gendisk *disk)
return;
}
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
blk_mq_debugfs_register_rq_qos(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
}
static u64 wbt_default_latency_nsec(struct request_queue *q)
@@ -1015,9 +1016,10 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
blk_mq_unquiesce_queue(q);
out:
blk_mq_unfreeze_queue(q, memflags);
- mutex_lock(&q->debugfs_mutex);
+
+ memflags = blk_debugfs_lock(q);
blk_mq_debugfs_register_rq_qos(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
return ret;
}
diff --git a/block/blk.h b/block/blk.h
index a6b1de509733..f6053e9dd2aa 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -729,4 +729,35 @@ static inline void blk_unfreeze_release_lock(struct request_queue *q)
}
#endif
+/*
+ * debugfs directory and file creation can trigger fs reclaim, which can enter
+ * back into the block layer request_queue. This can cause deadlock if the
+ * queue is frozen. Use NOIO context together with debugfs_mutex to prevent fs
+ * reclaim from triggering block I/O.
+ */
+static inline void blk_debugfs_lock_nomemsave(struct request_queue *q)
+{
+ mutex_lock(&q->debugfs_mutex);
+}
+
+static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q)
+{
+ mutex_unlock(&q->debugfs_mutex);
+}
+
+static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q)
+{
+ unsigned int memflags = memalloc_noio_save();
+
+ blk_debugfs_lock_nomemsave(q);
+ return memflags;
+}
+
+static inline void blk_debugfs_unlock(struct request_queue *q,
+ unsigned int memflags)
+{
+ blk_debugfs_unlock_nomemrestore(q);
+ memalloc_noio_restore(memflags);
+}
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/ioctl.c b/block/ioctl.c
index fd48f82f9f03..0b04661ac809 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -318,7 +318,13 @@ int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode,
EXPORT_SYMBOL(blkdev_compat_ptr_ioctl);
#endif
-static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode)
+enum pr_direction {
+ PR_IN, /* read from device */
+ PR_OUT, /* write to device */
+};
+
+static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode,
+ enum pr_direction dir)
{
/* no sense to make reservations for partitions */
if (bdev_is_partition(bdev))
@@ -326,11 +332,17 @@ static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode)
if (capable(CAP_SYS_ADMIN))
return true;
+
/*
- * Only allow unprivileged reservations if the file descriptor is open
- * for writing.
+ * Only allow unprivileged reservation _out_ commands if the file
+ * descriptor is open for writing. Allow reservation _in_ commands if
+ * the file descriptor is open for reading since they do not modify the
+ * device.
*/
- return mode & BLK_OPEN_WRITE;
+ if (dir == PR_IN)
+ return mode & BLK_OPEN_READ;
+ else
+ return mode & BLK_OPEN_WRITE;
}
static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode,
@@ -339,7 +351,7 @@ static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_registration reg;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_register)
return -EOPNOTSUPP;
@@ -357,7 +369,7 @@ static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_reserve)
return -EOPNOTSUPP;
@@ -375,7 +387,7 @@ static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_release)
return -EOPNOTSUPP;
@@ -393,7 +405,7 @@ static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_preempt p;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_preempt)
return -EOPNOTSUPP;
@@ -411,7 +423,7 @@ static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_clear c;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_clear)
return -EOPNOTSUPP;
@@ -434,7 +446,7 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode,
size_t keys_copy_len;
int ret;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_IN))
return -EPERM;
if (!ops || !ops->pr_read_keys)
return -EOPNOTSUPP;
@@ -486,7 +498,7 @@ static int blkdev_pr_read_reservation(struct block_device *bdev,
struct pr_read_reservation out = {};
int ret;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_IN))
return -EPERM;
if (!ops || !ops->pr_read_reservation)
return -EOPNOTSUPP;