summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 15:07:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 15:07:50 -0800
commit041c16acbafbdd8c089cc077c78e060322dde18c (patch)
tree28c0fde3fb949b94f59ddba7f5847c0d6e6d3e88
parente99785a923d585174a71ea9c081bee708184862e (diff)
parent795663b4d160ba652959f1a46381c5e8b1342a53 (diff)
Merge tag 'for-7.0/io_uring-zcrx-large-buffers-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring large rx buffer support from Jens Axboe: "Now that the networking updates are upstream, here's the support for large buffers for zcrx. Using larger (bigger than 4K) rx buffers can increase the effiency of zcrx. For example, it's been shown that using 32K buffers can decrease CPU usage by ~30% compared to 4K buffers" * tag 'for-7.0/io_uring-zcrx-large-buffers-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/zcrx: implement large rx buffer support
-rw-r--r--include/uapi/linux/io_uring.h2
-rw-r--r--io_uring/zcrx.c38
2 files changed, 34 insertions, 6 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index da5156954731..fc473af6feb4 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1104,7 +1104,7 @@ struct io_uring_zcrx_ifq_reg {
struct io_uring_zcrx_offsets offsets;
__u32 zcrx_id;
- __u32 __resv2;
+ __u32 rx_buf_len;
__u64 __resv[3];
};
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 3d398283cf34..d8b6db456bd7 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -55,6 +55,18 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
}
+static int io_area_max_shift(struct io_zcrx_mem *mem)
+{
+ struct sg_table *sgt = mem->sgt;
+ struct scatterlist *sg;
+ unsigned shift = -1U;
+ unsigned i;
+
+ for_each_sgtable_dma_sg(sgt, sg, i)
+ shift = min(shift, __ffs(sg->length));
+ return shift;
+}
+
static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
struct io_zcrx_area *area)
{
@@ -417,12 +429,21 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
}
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
- struct io_uring_zcrx_area_reg *area_reg)
+ struct io_uring_zcrx_area_reg *area_reg,
+ struct io_uring_zcrx_ifq_reg *reg)
{
+ int buf_size_shift = PAGE_SHIFT;
struct io_zcrx_area *area;
unsigned nr_iovs;
int i, ret;
+ if (reg->rx_buf_len) {
+ if (!is_power_of_2(reg->rx_buf_len) ||
+ reg->rx_buf_len < PAGE_SIZE)
+ return -EINVAL;
+ buf_size_shift = ilog2(reg->rx_buf_len);
+ }
+
ret = -ENOMEM;
area = kzalloc(sizeof(*area), GFP_KERNEL);
if (!area)
@@ -433,7 +454,12 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
if (ret)
goto err;
- ifq->niov_shift = PAGE_SHIFT;
+ if (buf_size_shift > io_area_max_shift(&area->mem)) {
+ ret = -ERANGE;
+ goto err;
+ }
+
+ ifq->niov_shift = buf_size_shift;
nr_iovs = area->mem.size >> ifq->niov_shift;
area->nia.num_niovs = nr_iovs;
@@ -743,8 +769,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
return -EINVAL;
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
- if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) ||
- reg.__resv2 || reg.zcrx_id)
+ if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) || reg.zcrx_id)
return -EINVAL;
if (reg.flags & ZCRX_REG_IMPORT)
return import_zcrx(ctx, arg, &reg);
@@ -801,10 +826,11 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
}
get_device(ifq->dev);
- ret = io_zcrx_create_area(ifq, &area);
+ ret = io_zcrx_create_area(ifq, &area, &reg);
if (ret)
goto netdev_put_unlock;
+ mp_param.rx_page_size = 1U << ifq->niov_shift;
mp_param.mp_ops = &io_uring_pp_zc_ops;
mp_param.mp_priv = ifq;
ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
@@ -822,6 +848,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
goto err;
}
+ reg.rx_buf_len = 1U << ifq->niov_shift;
+
if (copy_to_user(arg, &reg, sizeof(reg)) ||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {