From cfe5defd025b79ade5f9243a7d6ebaad176cb6ad Mon Sep 17 00:00:00 2001 From: Pat Tullmann Date: Wed, 2 Apr 2025 16:14:50 -0700 Subject: [PATCH 1/2] linux: futex v1 API cleanup * Use `packed struct` for flags arguments. So, instead of `linux.FUTEX.WAIT` use `.{ .cmd = .WAIT, .private = true }` * rename `futex_wait` and `futex_wake` which didn't actually specify wait/wake, as `futex_3arg` and `futex_4arg` (as its the number of parameters that is different, the `op` is whatever is specified. * expose the full six-arg flavor of the syscall (for some of the advanced ops), and add packed structs for their arguments. * Use a `packed union` to support the 4th parameter which is sometimes a `timespec` pointer, and sometimes a `u32`. * Add tests that make sure the structure layout is correct and that the basic argument passing is working (no actual futexes are contended). --- lib/std/Thread.zig | 6 +- lib/std/Thread/Futex.zig | 16 +++--- lib/std/os/linux.zig | 116 +++++++++++++++++++++++++++++--------- lib/std/os/linux/test.zig | 90 +++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 38 deletions(-) diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig index fe3bf0fcea..bb46bd3f24 100644 --- a/lib/std/Thread.zig +++ b/lib/std/Thread.zig @@ -1539,10 +1539,10 @@ const LinuxThreadImpl = struct { continue; } - switch (linux.E.init(linux.futex_wait( + switch (linux.E.init(linux.futex_4arg( &self.thread.child_tid.raw, - linux.FUTEX.WAIT, - tid, + .{ .cmd = .WAIT, .private = false }, + @bitCast(tid), null, ))) { .SUCCESS => continue, diff --git a/lib/std/Thread/Futex.zig b/lib/std/Thread/Futex.zig index 5e942924c3..aecf646424 100644 --- a/lib/std/Thread/Futex.zig +++ b/lib/std/Thread/Futex.zig @@ -262,10 +262,10 @@ const LinuxImpl = struct { ts.nsec = @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s)); } - const rc = linux.futex_wait( - @as(*const i32, @ptrCast(&ptr.raw)), - linux.FUTEX.PRIVATE_FLAG | linux.FUTEX.WAIT, - @as(i32, @bitCast(expect)), + const rc = linux.futex_4arg( + &ptr.raw, + .{ .cmd = .WAIT, .private = true }, + expect, if (timeout != null) &ts else null, ); @@ -284,10 +284,10 @@ const LinuxImpl = struct { } fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void { - const rc = linux.futex_wake( - @as(*const i32, @ptrCast(&ptr.raw)), - linux.FUTEX.PRIVATE_FLAG | linux.FUTEX.WAKE, - std.math.cast(i32, max_waiters) orelse std.math.maxInt(i32), + const rc = linux.futex_3arg( + &ptr.raw, + .{ .cmd = .WAKE, .private = true }, + @min(max_waiters, std.math.maxInt(i32)), ); switch (linux.E.init(rc)) { diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 32eb8d186d..17bba6aa5c 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -673,12 +673,34 @@ pub fn fallocate(fd: i32, mode: i32, offset: i64, length: i64) usize { } } -pub fn futex_wait(uaddr: *const i32, futex_op: u32, val: i32, timeout: ?*const timespec) usize { - return syscall4(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val)), @intFromPtr(timeout)); +// The 4th parameter to the v1 futex syscall can either be an optional +// pointer to a timespec, or a uint32, depending on which "op" is being +// performed. +pub const futex_param4 = extern union { + timeout: ?*const timespec, + /// On all platforms only the bottom 32-bits of `val2` are relevant. + /// This is 64-bit to match the pointer in the union. + val2: usize, +}; + +/// The futex v1 syscall, see also the newer the futex2_{wait,wakeup,requeue,waitv} syscalls. +/// +/// The futex_op parameter is a sub-command and flags. The sub-command +/// defines which of the subsequent paramters are relevant. +pub fn futex(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, val2timeout: futex_param4, uaddr2: ?*const anyopaque, val3: u32) usize { + return syscall6(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(val2timeout.timeout), @intFromPtr(uaddr2), val3); } -pub fn futex_wake(uaddr: *const i32, futex_op: u32, val: i32) usize { - return syscall3(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val))); +/// Three-argument variation of the v1 futex call. Only suitable for a +/// futex_op that ignores the remaining arguments (e.g., FUTUX_OP.WAKE). +pub fn futex_3arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32) usize { + return syscall3(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val); +} + +/// Four-argument variation on the v1 futex call. Only suitable for +/// futex_op that ignores the remaining arguments (e.g., FUTEX_OP.WAIT). +pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout: ?*const timespec) usize { + return syscall4(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(timeout)); } /// Given an array of `futex_waitv`, wait on each uaddr. @@ -3385,29 +3407,6 @@ pub const FALLOC = struct { pub const FL_UNSHARE_RANGE = 0x40; }; -pub const FUTEX = struct { - pub const WAIT = 0; - pub const WAKE = 1; - pub const FD = 2; - pub const REQUEUE = 3; - pub const CMP_REQUEUE = 4; - pub const WAKE_OP = 5; - pub const LOCK_PI = 6; - pub const UNLOCK_PI = 7; - pub const TRYLOCK_PI = 8; - pub const WAIT_BITSET = 9; - pub const WAKE_BITSET = 10; - pub const WAIT_REQUEUE_PI = 11; - pub const CMP_REQUEUE_PI = 12; - - pub const PRIVATE_FLAG = 128; - - pub const CLOCK_REALTIME = 256; - - /// Max numbers of elements in a `futex_waitv` array. - pub const WAITV_MAX = 128; -}; - pub const FUTEX2 = struct { pub const SIZE_U8 = 0x00; pub const SIZE_U16 = 0x01; @@ -3418,6 +3417,69 @@ pub const FUTEX2 = struct { pub const PRIVATE = FUTEX.PRIVATE_FLAG; }; +// Futex v1 API commands. See futex man page for each command's +// interpretation of the futex arguments. +pub const FUTEX_COMMAND = enum(u7) { + WAIT = 0, + WAKE = 1, + FD = 2, + REQUEUE = 3, + CMP_REQUEUE = 4, + WAKE_OP = 5, + LOCK_PI = 6, + UNLOCK_PI = 7, + TRYLOCK_PI = 8, + WAIT_BITSET = 9, + WAKE_BITSET = 10, + WAIT_REQUEUE_PI = 11, + CMP_REQUEUE_PI = 12, +}; + +/// Futex v1 API command and flags for the `futex_op` parameter +pub const FUTEX_OP = packed struct(u32) { + cmd: FUTEX_COMMAND, + private: bool, + realtime: bool = false, // realtime clock vs. monotonic clock + _reserved: u23 = 0, +}; + +/// Futex v1 FUTEX_WAKE_OP `val3` operation: +pub const FUTEX_WAKE_OP = packed struct(u32) { + cmd: FUTEX_WAKE_OP_CMD, + /// From C API `FUTEX_OP_ARG_SHIFT`: Use (1 << oparg) as operand + arg_shift: bool = false, + cmp: FUTEX_WAKE_OP_CMP, + oparg: u12, + cmdarg: u12, +}; + +/// Futex v1 cmd for FUTEX_WAKE_OP `val3` command. +pub const FUTEX_WAKE_OP_CMD = enum(u3) { + /// uaddr2 = oparg + SET = 0, + /// uaddr2 += oparg + ADD = 1, + /// uaddr2 |= oparg + OR = 2, + /// uaddr2 &= ~oparg + ANDN = 3, + /// uaddr2 ^= oparg + XOR = 4, +}; + +/// Futex v1 comparison op for FUTEX_WAKE_OP `val3` cmp +pub const FUTEX_WAKE_OP_CMP = enum(u4) { + EQ = 0, + NE = 1, + LT = 2, + LE = 3, + GT = 4, + GE = 5, +}; + +/// Max numbers of elements in a `futex_waitv` array. +pub const FUTEX2_WAITV_MAX = 128; + pub const PROT = struct { /// page can not be accessed pub const NONE = 0x0; diff --git a/lib/std/os/linux/test.zig b/lib/std/os/linux/test.zig index 04702903ef..96d31246e1 100644 --- a/lib/std/os/linux/test.zig +++ b/lib/std/os/linux/test.zig @@ -207,6 +207,96 @@ test "sysinfo" { try expect(info.mem_unit <= std.heap.page_size_max); } +comptime { + std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX_OP{ .cmd = @enumFromInt(0), .private = true, .realtime = false }))); + std.debug.assert(256 == @as(u32, @bitCast(linux.FUTEX_OP{ .cmd = @enumFromInt(0), .private = false, .realtime = true }))); + + // Check futex_param4 union is packed correctly + const param_union = linux.futex_param4{ + .val2 = 0xaabbcc, + }; + std.debug.assert(@intFromPtr(param_union.timeout) == 0xaabbcc); +} + +test "futex v1" { + var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1); + var rc: usize = 0; + + // No-op wait, lock value is not expected value + rc = linux.futex(&lock.raw, .{ .cmd = .WAIT, .private = true }, 2, .{ .timeout = null }, null, 0); + try expectEqual(.AGAIN, linux.E.init(rc)); + + rc = linux.futex_4arg(&lock.raw, .{ .cmd = .WAIT, .private = true }, 2, null); + try expectEqual(.AGAIN, linux.E.init(rc)); + + // Short-fuse wait, timeout kicks in + rc = linux.futex(&lock.raw, .{ .cmd = .WAIT, .private = true }, 1, .{ .timeout = &.{ .sec = 0, .nsec = 2 } }, null, 0); + try expectEqual(.TIMEDOUT, linux.E.init(rc)); + + rc = linux.futex_4arg(&lock.raw, .{ .cmd = .WAIT, .private = true }, 1, &.{ .sec = 0, .nsec = 2 }); + try expectEqual(.TIMEDOUT, linux.E.init(rc)); + + // Wakeup (no waiters) + rc = linux.futex(&lock.raw, .{ .cmd = .WAKE, .private = true }, 2, .{ .timeout = null }, null, 0); + try expectEqual(0, rc); + + rc = linux.futex_3arg(&lock.raw, .{ .cmd = .WAKE, .private = true }, 2); + try expectEqual(0, rc); + + // CMP_REQUEUE - val3 mismatch + rc = linux.futex(&lock.raw, .{ .cmd = .CMP_REQUEUE, .private = true }, 2, .{ .val2 = 0 }, null, 99); + try expectEqual(.AGAIN, linux.E.init(rc)); + + // CMP_REQUEUE - requeue (but no waiters, so ... not much) + { + const val3 = 1; + const wake_nr = 3; + const requeue_max = std.math.maxInt(u31); + var target_lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1); + rc = linux.futex(&lock.raw, .{ .cmd = .CMP_REQUEUE, .private = true }, wake_nr, .{ .val2 = requeue_max }, &target_lock.raw, val3); + try expectEqual(0, rc); + } + + // WAKE_OP - just to see if we can construct the arguments ... + { + var lock2: std.atomic.Value(u32) = std.atomic.Value(u32).init(1); + const wake1_nr = 2; + const wake2_nr = 3; + const wake_op = linux.FUTEX_WAKE_OP{ + .cmd = .ANDN, + .arg_shift = true, + .cmp = .LT, + .oparg = 4, + .cmdarg = 5, + }; + + rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_OP, .private = true }, wake1_nr, .{ .val2 = wake2_nr }, &lock2.raw, @bitCast(wake_op)); + try expectEqual(0, rc); + } + + // WAIT_BITSET + { + // val1 return early + rc = linux.futex(&lock.raw, .{ .cmd = .WAIT_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0xfff); + try expectEqual(.AGAIN, linux.E.init(rc)); + + // timeout wait + const timeout: linux.timespec = .{ .sec = 0, .nsec = 2 }; + rc = linux.futex(&lock.raw, .{ .cmd = .WAIT_BITSET, .private = true }, 1, .{ .timeout = &timeout }, null, 0xfff); + try expectEqual(.TIMEDOUT, linux.E.init(rc)); + } + + // WAKE_BITSET + { + rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0xfff000); + try expectEqual(0, rc); + + // bitmask must have at least 1 bit set: + rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0); + try expectEqual(.INVAL, linux.E.init(rc)); + } +} + test { _ = linux.IoUring; } From 89d15a8d47fdfe41ae650e399d258de3184e6b4d Mon Sep 17 00:00:00 2001 From: Pat Tullmann Date: Wed, 2 Apr 2025 16:21:23 -0700 Subject: [PATCH 2/2] linux: futex v2 API updates * `futex2_waitv` always takes a 64-bit timespec. Perhaps the `kernel_timespec` should be renamed `timespec64`? Its used in iouring, too. * Add `packed struct` for futex v2 flags and parameters. * Add very basic "tests" for the futex v2 syscalls (just to ensure the code compiles). * Update the stale or broken comments. (I could also just delete these they're not really documenting Zig-specific behavior.) Given that the futex2 APIs are not used by Zig's library (they're a bit too new), and the fact that these are very specialized syscalls, and they currently provide no benefit over the existing v1 API, I wonder if instead of fixing these up, we should just replace them with a stub that says 'use a 3rd party library'. --- lib/std/os/linux.zig | 156 ++++++++++++++++++++++---------------- lib/std/os/linux/test.zig | 130 +++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 67 deletions(-) diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 17bba6aa5c..f69fc8f348 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -703,15 +703,13 @@ pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout return syscall4(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(timeout)); } -/// Given an array of `futex_waitv`, wait on each uaddr. +/// Given an array of `futex2_waitone`, wait on each uaddr. /// The thread wakes if a futex_wake() is performed at any uaddr. -/// The syscall returns immediately if any waiter has *uaddr != val. -/// timeout is an optional timeout value for the operation. -/// Each waiter has individual flags. -/// The `flags` argument for the syscall should be used solely for specifying -/// the timeout as realtime, if needed. -/// Flags for private futexes, sizes, etc. should be used on the -/// individual flags of each waiter. +/// The syscall returns immediately if any futex has *uaddr != val. +/// timeout is an optional, absolute timeout value for the operation. +/// The `flags` argument is for future use and currently should be `.{}`. +/// Flags for private futexes, sizes, etc. should be set on the +/// individual flags of each `futex2_waitone`. /// /// Returns the array index of one of the woken futexes. /// No further information is provided: any number of other futexes may also @@ -719,42 +717,43 @@ pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout /// the returned index may refer to any one of them. /// (It is not necessaryily the futex with the smallest index, nor the one /// most recently woken, nor...) +/// +/// Requires at least kernel v5.16. pub fn futex2_waitv( - /// List of futexes to wait on. - waiters: [*]futex_waitv, - /// Length of `waiters`. + futexes: [*]const futex2_waitone, + /// Length of `futexes`. Max of FUTEX2_WAITONE_MAX. nr_futexes: u32, - /// Flag for timeout (monotonic/realtime). - flags: u32, - /// Optional absolute timeout. - timeout: ?*const timespec, + flags: FUTEX2_FLAGS_WAITV, + /// Optional absolute timeout. Always 64-bit, even on 32-bit platforms. + timeout: ?*const kernel_timespec, /// Clock to be used for the timeout, realtime or monotonic. clockid: clockid_t, ) usize { return syscall5( .futex_waitv, - @intFromPtr(waiters), + @intFromPtr(futexes), nr_futexes, - flags, + @as(u32, @bitCast(flags)), @intFromPtr(timeout), - @bitCast(@as(isize, @intFromEnum(clockid))), + @intFromEnum(clockid), ); } -/// Wait on a futex. -/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the -/// futex2 familiy of calls. +/// Wait on a single futex. +/// Identical to the futex v1 `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the +/// futex2 family of calls. +/// +/// Requires at least kernel v6.7. pub fn futex2_wait( /// Address of the futex to wait on. uaddr: *const anyopaque, /// Value of `uaddr`. val: usize, - /// Bitmask. + /// Bitmask to match against incoming wakeup masks. Must not be zero. mask: usize, - /// `FUTEX2` flags. - flags: u32, - /// Optional absolute timeout. - timeout: ?*const timespec, + flags: FUTEX2_FLAGS, + /// Optional absolute timeout. Always 64-bit, even on 32-bit platforms. + timeout: ?*const kernel_timespec, /// Clock to be used for the timeout, realtime or monotonic. clockid: clockid_t, ) usize { @@ -763,52 +762,55 @@ pub fn futex2_wait( @intFromPtr(uaddr), val, mask, - flags, + @as(u32, @bitCast(flags)), @intFromPtr(timeout), - @bitCast(@as(isize, @intFromEnum(clockid))), + @intFromEnum(clockid), ); } -/// Wake a number of futexes. -/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the +/// Wake (subset of) waiters on given futex. +/// Identical to the traditional `FUTEX.FUTEX_WAKE_BITSET` op, except it is part of the /// futex2 family of calls. +/// +/// Requires at least kernel v6.7. pub fn futex2_wake( - /// Address of the futex(es) to wake. + /// Futex to wake uaddr: *const anyopaque, - /// Bitmask + /// Bitmask to match against waiters. mask: usize, - /// Number of the futexes to wake. - nr: i32, - /// `FUTEX2` flags. - flags: u32, + /// Maximum number of waiters on the futex to wake. + nr_wake: i32, + flags: FUTEX2_FLAGS, ) usize { return syscall4( .futex_wake, @intFromPtr(uaddr), mask, - @bitCast(@as(isize, nr)), - flags, + @as(u32, @bitCast(nr_wake)), + @as(u32, @bitCast(flags)), ); } -/// Requeue a waiter from one futex to another. +/// Wake and/or requeue waiter(s) from one futex to another. /// Identical to `FUTEX.CMP_REQUEUE`, except it is part of the futex2 family of calls. +/// +/// Requires at least kernel v6.7. pub fn futex2_requeue( - /// Array describing the source and destination futex. - waiters: [*]futex_waitv, - /// Unused. - flags: u32, - /// Number of futexes to wake. + /// The source and destination futexes. Must be a 2-element array. + waiters: [*]const futex2_waitone, + /// Currently unused. + flags: FUTEX2_FLAGS_REQUEUE, + /// Maximum number of waiters to wake on the source futex. nr_wake: i32, - /// Number of futexes to requeue. + /// Maximum number of waiters to transfer to the destination futex. nr_requeue: i32, ) usize { return syscall4( .futex_requeue, @intFromPtr(waiters), - flags, - @bitCast(@as(isize, nr_wake)), - @bitCast(@as(isize, nr_requeue)), + @as(u32, @bitCast(flags)), + @as(u32, @bitCast(nr_wake)), + @as(u32, @bitCast(nr_requeue)), ); } @@ -3407,16 +3409,6 @@ pub const FALLOC = struct { pub const FL_UNSHARE_RANGE = 0x40; }; -pub const FUTEX2 = struct { - pub const SIZE_U8 = 0x00; - pub const SIZE_U16 = 0x01; - pub const SIZE_U32 = 0x02; - pub const SIZE_U64 = 0x03; - pub const NUMA = 0x04; - - pub const PRIVATE = FUTEX.PRIVATE_FLAG; -}; - // Futex v1 API commands. See futex man page for each command's // interpretation of the futex arguments. pub const FUTEX_COMMAND = enum(u7) { @@ -3477,8 +3469,38 @@ pub const FUTEX_WAKE_OP_CMP = enum(u4) { GE = 5, }; -/// Max numbers of elements in a `futex_waitv` array. -pub const FUTEX2_WAITV_MAX = 128; +/// Max numbers of elements in a `futex2_waitone` array. +pub const FUTEX2_WAITONE_MAX = 128; + +/// For futex v2 API, the size of the futex at the uaddr. v1 futex are +/// always implicitly U32. As of kernel v6.14, only U32 is implemented +/// for v2 futexes. +pub const FUTEX2_SIZE = enum(u2) { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +/// As of kernel 6.14 there are no defined flags to futex2_waitv. +pub const FUTEX2_FLAGS_WAITV = packed struct(u32) { + _reserved: u32 = 0, +}; + +/// As of kernel 6.14 there are no defined flags to futex2_requeue. +pub const FUTEX2_FLAGS_REQUEUE = packed struct(u32) { + _reserved: u32 = 0, +}; + +/// Flags for futex v2 APIs (futex2_wait, futex2_wake, futex2_requeue, but +/// not the futex2_waitv syscall, but also used in the futex2_waitone struct). +pub const FUTEX2_FLAGS = packed struct(u32) { + size: FUTEX2_SIZE, + numa: bool = false, + _reserved: u4 = 0, + private: bool, + _undefined: u24 = 0, +}; pub const PROT = struct { /// page can not be accessed @@ -9343,17 +9365,17 @@ pub const PTRACE = struct { pub const GET_SYSCALL_INFO = 0x420e; }; -/// A waiter for vectorized wait. -pub const futex_waitv = extern struct { - // Expected value at uaddr +/// For futex2_waitv and futex2_requeue. Arrays of `futex2_waitone` allow +/// waiting on multiple futexes in one call. +pub const futex2_waitone = extern struct { + /// Expected value at uaddr, should match size of futex. val: u64, - /// User address to wait on. + /// User address to wait on. Top-bits must be 0 on 32-bit. uaddr: u64, /// Flags for this waiter. - flags: u32, + flags: FUTEX2_FLAGS, /// Reserved member to preserve alignment. - /// Should be 0. - __reserved: u32, + __reserved: u32 = 0, }; pub const cache_stat_range = extern struct { diff --git a/lib/std/os/linux/test.zig b/lib/std/os/linux/test.zig index 96d31246e1..e38687dbde 100644 --- a/lib/std/os/linux/test.zig +++ b/lib/std/os/linux/test.zig @@ -297,6 +297,136 @@ test "futex v1" { } } +comptime { + std.debug.assert(2 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = false }))); + std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = @enumFromInt(0), .private = true }))); +} + +test "futex2_waitv" { + const locks = [_]std.atomic.Value(u32){ + std.atomic.Value(u32).init(1), + std.atomic.Value(u32).init(1), + std.atomic.Value(u32).init(1), + }; + + const futexes = [_]linux.futex2_waitone{ + .{ + .val = 1, + .uaddr = @intFromPtr(&locks[0].raw), + .flags = .{ .size = .U32, .private = true }, + }, + .{ + .val = 1, + .uaddr = @intFromPtr(&locks[1].raw), + .flags = .{ .size = .U32, .private = true }, + }, + .{ + .val = 1, + .uaddr = @intFromPtr(&locks[2].raw), + .flags = .{ .size = .U32, .private = true }, + }, + }; + + const timeout = linux.kernel_timespec{ .sec = 0, .nsec = 2 }; // absolute timeout, so this is 1970... + const rc = linux.futex2_waitv(&futexes, futexes.len, .{}, &timeout, .MONOTONIC); + switch (linux.E.init(rc)) { + .NOSYS => return error.SkipZigTest, // futex2_waitv added in kernel v5.16 + else => |err| try expectEqual(.TIMEDOUT, err), + } +} + +// Futex v2 API is only supported on recent kernels (v6.7), so skip tests if the syscalls +// return ENOSYS. +fn futex2_skip_if_unsupported() !void { + const lock: u32 = 0; + const rc = linux.futex2_wake(&lock, 0, 1, .{ .size = .U32, .private = true }); + if (linux.E.init(rc) == .NOSYS) { + return error.SkipZigTest; + } +} + +test "futex2_wait" { + var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1); + var rc: usize = 0; + const mask = 0x1; + + try futex2_skip_if_unsupported(); + + // The API for 8,16,64 bit futexes is defined, but as of kernel v6.14 + // (at least) they're not implemented. + if (false) { + rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U8, .private = true }, null, .MONOTONIC); + try expectEqual(.INVAL, linux.E.init(rc)); + + rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U16, .private = true }, null, .MONOTONIC); + try expectEqual(.INVAL, linux.E.init(rc)); + + rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U64, .private = true }, null, .MONOTONIC); + try expectEqual(.INVAL, linux.E.init(rc)); + } + + const flags = linux.FUTEX2_FLAGS{ .size = .U32, .private = true }; + // no-wait, lock state mismatch + rc = linux.futex2_wait(&lock.raw, 2, mask, flags, null, .MONOTONIC); + try expectEqual(.AGAIN, linux.E.init(rc)); + + // hit timeout on wait + rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .MONOTONIC); + try expectEqual(.TIMEDOUT, linux.E.init(rc)); + + // timeout is absolute + { + var curr: linux.timespec = undefined; + rc = linux.clock_gettime(.MONOTONIC, &curr); // gettime() uses platform timespec + try expectEqual(0, rc); + + // ... but futex2_wait always uses 64-bit timespec + var timeout: linux.kernel_timespec = .{ + .sec = curr.sec, + .nsec = curr.nsec + 2, + }; + rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &timeout, .MONOTONIC); + try expectEqual(.TIMEDOUT, linux.E.init(rc)); + } + + rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .REALTIME); + try expectEqual(.TIMEDOUT, linux.E.init(rc)); +} + +test "futex2_wake" { + var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1); + + try futex2_skip_if_unsupported(); + + const rc = linux.futex2_wake(&lock.raw, 0xFF, 1, .{ .size = .U32, .private = true }); + try expectEqual(0, rc); +} + +test "futex2_requeue" { + try futex2_skip_if_unsupported(); + + const locks = [_]std.atomic.Value(u32){ + std.atomic.Value(u32).init(1), + std.atomic.Value(u32).init(1), + }; + + const futexes = [_]linux.futex2_waitone{ + .{ + .val = 1, + .uaddr = @intFromPtr(&locks[0].raw), + .flags = .{ .size = .U32, .private = true }, + }, + .{ + .val = 1, + .uaddr = @intFromPtr(&locks[1].raw), + .flags = .{ .size = .U32, .private = true }, + }, + }; + + const rc = linux.futex2_requeue(&futexes, .{}, 2, 2); + try expectEqual(0, rc); +} + test { _ = linux.IoUring; }