Skip to content

Commit 7b751b0

Browse files
committed
Merge tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull more io_uring updates from Jens Axboe: "This is a mix of cleanups and fixes. No major fixes in here, just a bunch of little fixes. Some of them marked for stable as it fixes behavioral issues - Fix an issue with SOCKET_URING_OP_SETSOCKOPT for netlink sockets, due to a too restrictive check on it having an ioctl handler - Remove a redundant SQPOLL check in ring creation - Kill dead accounting for zero-copy send, which doesn't use ->buf or ->len post the initial setup - Fix missing clamp of the allocation hint, which could cause allocations to fall outside of the range the application asked for. Still within the allowed limits. - Fix for IORING_OP_PIPE's handling of direct descriptors - Tweak to the API for the newly added BPF filters, making them more future proof in terms of how applications deal with them - A few fixes for zcrx, fixing a few error handling conditions - Fix for zcrx request flag checking - Add support for querying the zcrx page size - Improve the NO_SQARRAY static branch inc/dec, avoiding busy conditions causing too much traffic - Various little cleanups" * tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/bpf_filter: pass in expected filter payload size io_uring/bpf_filter: move filter size and populate helper into struct io_uring/cancel: de-unionize file and user_data in struct io_cancel_data io_uring/rsrc: improve regbuf iov validation io_uring: remove unneeded io_send_zc accounting io_uring/cmd_net: fix too strict requirement on ioctl io_uring: delay sqarray static branch disablement io_uring/query: add query.h copyright notice io_uring/query: return support for custom rx page size io_uring/zcrx: check unsupported flags on import io_uring/zcrx: fix post open error handling io_uring/zcrx: fix sgtable leak on mapping failures io_uring: use the right type for creds iteration io_uring/openclose: fix io_pipe_fixed() slot tracking for specific slots io_uring/filetable: clamp alloc_hint to the configured alloc range io_uring/rsrc: replace reg buffer bit field with flags io_uring/zcrx: improve types for size calculation io_uring/tctx: avoid modifying loop variable in io_ring_add_registered_file io_uring: simplify IORING_SETUP_DEFER_TASKRUN && !SQPOLL check
2 parents 9702969 + be35731 commit 7b751b0

18 files changed

Lines changed: 149 additions & 90 deletions

File tree

include/uapi/linux/io_uring.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,14 @@ enum zcrx_reg_flags {
10901090
ZCRX_REG_IMPORT = 1,
10911091
};
10921092

1093+
enum zcrx_features {
1094+
/*
1095+
* The user can ask for the desired rx page size by passing the
1096+
* value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
1097+
*/
1098+
ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
1099+
};
1100+
10931101
/*
10941102
* Argument for IORING_REGISTER_ZCRX_IFQ
10951103
*/

include/uapi/linux/io_uring/bpf_filter.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,19 @@ enum {
3535
* If set, any currently unset opcode will have a deny filter attached
3636
*/
3737
IO_URING_BPF_FILTER_DENY_REST = 1,
38+
/*
39+
* If set, if kernel and application don't agree on pdu_size for
40+
* the given opcode, fail the registration of the filter.
41+
*/
42+
IO_URING_BPF_FILTER_SZ_STRICT = 2,
3843
};
3944

4045
struct io_uring_bpf_filter {
4146
__u32 opcode; /* io_uring opcode to filter */
4247
__u32 flags;
4348
__u32 filter_len; /* number of BPF instructions */
44-
__u32 resv;
49+
__u8 pdu_size; /* expected pdu size for opcode */
50+
__u8 resv[3];
4551
__u64 filter_ptr; /* pointer to BPF filter */
4652
__u64 resv2[5];
4753
};

include/uapi/linux/io_uring/query.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
22
/*
33
* Header file for the io_uring query interface.
4+
*
5+
* Copyright (C) 2026 Pavel Begunkov <asml.silence@gmail.com>
6+
* Copyright (C) Meta Platforms, Inc.
47
*/
58
#ifndef LINUX_IO_URING_QUERY_H
69
#define LINUX_IO_URING_QUERY_H
@@ -50,7 +53,8 @@ struct io_uring_query_zcrx {
5053
__u64 area_flags;
5154
/* The number of supported ZCRX_CTRL_* opcodes */
5255
__u32 nr_ctrl_opcodes;
53-
__u32 __resv1;
56+
/* Bitmask of ZCRX_FEATURE_* indicating which features are available */
57+
__u32 features;
5458
/* The refill ring header size */
5559
__u32 rq_hdr_size;
5660
/* The alignment for the header */

io_uring/bpf_filter.c

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ static const struct io_bpf_filter dummy_filter;
2626
static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
2727
struct io_kiocb *req)
2828
{
29+
const struct io_issue_def *def = &io_issue_defs[req->opcode];
30+
2931
bctx->opcode = req->opcode;
3032
bctx->sqe_flags = (__force int) req->flags & SQE_VALID_FLAGS;
3133
bctx->user_data = req->cqe.user_data;
@@ -34,19 +36,12 @@ static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
3436
sizeof(*bctx) - offsetof(struct io_uring_bpf_ctx, pdu_size));
3537

3638
/*
37-
* Opcodes can provide a handler fo populating more data into bctx,
39+
* Opcodes can provide a handler for populating more data into bctx,
3840
* for filters to use.
3941
*/
40-
switch (req->opcode) {
41-
case IORING_OP_SOCKET:
42-
bctx->pdu_size = sizeof(bctx->socket);
43-
io_socket_bpf_populate(bctx, req);
44-
break;
45-
case IORING_OP_OPENAT:
46-
case IORING_OP_OPENAT2:
47-
bctx->pdu_size = sizeof(bctx->open);
48-
io_openat_bpf_populate(bctx, req);
49-
break;
42+
if (def->filter_pdu_size) {
43+
bctx->pdu_size = def->filter_pdu_size;
44+
def->filter_populate(bctx, req);
5045
}
5146
}
5247

@@ -313,36 +308,69 @@ static struct io_bpf_filters *io_bpf_filter_cow(struct io_restriction *src)
313308
return ERR_PTR(-EBUSY);
314309
}
315310

316-
#define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST
311+
#define IO_URING_BPF_FILTER_FLAGS (IO_URING_BPF_FILTER_DENY_REST | \
312+
IO_URING_BPF_FILTER_SZ_STRICT)
317313

318-
int io_register_bpf_filter(struct io_restriction *res,
319-
struct io_uring_bpf __user *arg)
314+
static int io_bpf_filter_import(struct io_uring_bpf *reg,
315+
struct io_uring_bpf __user *arg)
320316
{
321-
struct io_bpf_filters *filters, *old_filters = NULL;
322-
struct io_bpf_filter *filter, *old_filter;
323-
struct io_uring_bpf reg;
324-
struct bpf_prog *prog;
325-
struct sock_fprog fprog;
317+
const struct io_issue_def *def;
326318
int ret;
327319

328-
if (copy_from_user(&reg, arg, sizeof(reg)))
320+
if (copy_from_user(reg, arg, sizeof(*reg)))
329321
return -EFAULT;
330-
if (reg.cmd_type != IO_URING_BPF_CMD_FILTER)
322+
if (reg->cmd_type != IO_URING_BPF_CMD_FILTER)
331323
return -EINVAL;
332-
if (reg.cmd_flags || reg.resv)
324+
if (reg->cmd_flags || reg->resv)
333325
return -EINVAL;
334326

335-
if (reg.filter.opcode >= IORING_OP_LAST)
327+
if (reg->filter.opcode >= IORING_OP_LAST)
336328
return -EINVAL;
337-
if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
329+
if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
338330
return -EINVAL;
339-
if (reg.filter.resv)
331+
if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv)))
340332
return -EINVAL;
341-
if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2)))
333+
if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2)))
342334
return -EINVAL;
343-
if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS)
335+
if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS)
344336
return -EINVAL;
345337

338+
/* Verify filter size */
339+
def = &io_issue_defs[array_index_nospec(reg->filter.opcode, IORING_OP_LAST)];
340+
341+
/* same size, always ok */
342+
ret = 0;
343+
if (reg->filter.pdu_size == def->filter_pdu_size)
344+
;
345+
/* size differs, fail in strict mode */
346+
else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT)
347+
ret = -EMSGSIZE;
348+
/* userspace filter is bigger, always disallow */
349+
else if (reg->filter.pdu_size > def->filter_pdu_size)
350+
ret = -EMSGSIZE;
351+
352+
/* copy back kernel filter size */
353+
reg->filter.pdu_size = def->filter_pdu_size;
354+
if (copy_to_user(&arg->filter, &reg->filter, sizeof(reg->filter)))
355+
return -EFAULT;
356+
357+
return ret;
358+
}
359+
360+
int io_register_bpf_filter(struct io_restriction *res,
361+
struct io_uring_bpf __user *arg)
362+
{
363+
struct io_bpf_filters *filters, *old_filters = NULL;
364+
struct io_bpf_filter *filter, *old_filter;
365+
struct io_uring_bpf reg;
366+
struct bpf_prog *prog;
367+
struct sock_fprog fprog;
368+
int ret;
369+
370+
ret = io_bpf_filter_import(&reg, arg);
371+
if (ret)
372+
return ret;
373+
346374
fprog.len = reg.filter.filter_len;
347375
fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);
348376

io_uring/cancel.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66

77
struct io_cancel_data {
88
struct io_ring_ctx *ctx;
9-
union {
10-
u64 data;
11-
struct file *file;
12-
};
9+
u64 data;
10+
struct file *file;
1311
u8 opcode;
1412
u32 flags;
1513
int seq;

io_uring/cmd_net.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,16 +160,19 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
160160
struct proto *prot = READ_ONCE(sk->sk_prot);
161161
int ret, arg = 0;
162162

163-
if (!prot || !prot->ioctl)
164-
return -EOPNOTSUPP;
165-
166163
switch (cmd->cmd_op) {
167164
case SOCKET_URING_OP_SIOCINQ:
165+
if (!prot || !prot->ioctl)
166+
return -EOPNOTSUPP;
167+
168168
ret = prot->ioctl(sk, SIOCINQ, &arg);
169169
if (ret)
170170
return ret;
171171
return arg;
172172
case SOCKET_URING_OP_SIOCOUTQ:
173+
if (!prot || !prot->ioctl)
174+
return -EOPNOTSUPP;
175+
173176
ret = prot->ioctl(sk, SIOCOUTQ, &arg);
174177
if (ret)
175178
return ret;

io_uring/filetable.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
2222
if (!table->bitmap)
2323
return -ENFILE;
2424

25+
if (table->alloc_hint < ctx->file_alloc_start ||
26+
table->alloc_hint >= ctx->file_alloc_end)
27+
table->alloc_hint = ctx->file_alloc_start;
28+
2529
do {
2630
ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
2731
if (ret != nr)

io_uring/io_uring.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119
static void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags);
120120
static void __io_req_caches_free(struct io_ring_ctx *ctx);
121121

122-
static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
122+
static __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(io_key_has_sqarray, HZ);
123123

124124
struct kmem_cache *req_cachep;
125125
static struct workqueue_struct *iou_wq __ro_after_init;
@@ -1978,7 +1978,7 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
19781978
unsigned mask = ctx->sq_entries - 1;
19791979
unsigned head = ctx->cached_sq_head++ & mask;
19801980

1981-
if (static_branch_unlikely(&io_key_has_sqarray) &&
1981+
if (static_branch_unlikely(&io_key_has_sqarray.key) &&
19821982
(!(ctx->flags & IORING_SETUP_NO_SQARRAY))) {
19831983
head = READ_ONCE(ctx->sq_array[head]);
19841984
if (unlikely(head >= ctx->sq_entries)) {
@@ -2173,7 +2173,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
21732173
io_rings_free(ctx);
21742174

21752175
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
2176-
static_branch_dec(&io_key_has_sqarray);
2176+
static_branch_slow_dec_deferred(&io_key_has_sqarray);
21772177

21782178
percpu_ref_exit(&ctx->refs);
21792179
free_uid(ctx->user);
@@ -2398,7 +2398,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
23982398
static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
23992399
{
24002400
unsigned long index;
2401-
struct creds *creds;
2401+
struct cred *creds;
24022402

24032403
mutex_lock(&ctx->uring_lock);
24042404
percpu_ref_kill(&ctx->refs);
@@ -2946,11 +2946,10 @@ static __cold int io_uring_create(struct io_ctx_config *config)
29462946
ctx->clock_offset = 0;
29472947

29482948
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
2949-
static_branch_inc(&io_key_has_sqarray);
2949+
static_branch_deferred_inc(&io_key_has_sqarray);
29502950

29512951
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
2952-
!(ctx->flags & IORING_SETUP_IOPOLL) &&
2953-
!(ctx->flags & IORING_SETUP_SQPOLL))
2952+
!(ctx->flags & IORING_SETUP_IOPOLL))
29542953
ctx->task_complete = true;
29552954

29562955
if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))

io_uring/net.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,8 +1493,6 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
14931493
return -EAGAIN;
14941494

14951495
if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1496-
zc->len -= ret;
1497-
zc->buf += ret;
14981496
zc->done_io += ret;
14991497
return -EAGAIN;
15001498
}

io_uring/opdef.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,10 @@ const struct io_issue_def io_issue_defs[] = {
221221
.issue = io_fallocate,
222222
},
223223
[IORING_OP_OPENAT] = {
224+
.filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, open),
224225
.prep = io_openat_prep,
225226
.issue = io_openat,
227+
.filter_populate = io_openat_bpf_populate,
226228
},
227229
[IORING_OP_CLOSE] = {
228230
.prep = io_close_prep,
@@ -309,8 +311,10 @@ const struct io_issue_def io_issue_defs[] = {
309311
#endif
310312
},
311313
[IORING_OP_OPENAT2] = {
314+
.filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, open),
312315
.prep = io_openat2_prep,
313316
.issue = io_openat2,
317+
.filter_populate = io_openat_bpf_populate,
314318
},
315319
[IORING_OP_EPOLL_CTL] = {
316320
.unbound_nonreg_file = 1,
@@ -406,8 +410,10 @@ const struct io_issue_def io_issue_defs[] = {
406410
[IORING_OP_SOCKET] = {
407411
.audit_skip = 1,
408412
#if defined(CONFIG_NET)
413+
.filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, socket),
409414
.prep = io_socket_prep,
410415
.issue = io_socket,
416+
.filter_populate = io_socket_bpf_populate,
411417
#else
412418
.prep = io_eopnotsupp_prep,
413419
#endif

0 commit comments

Comments
 (0)