From cca021c211f5d962c2999becaf12ccc7499ccb1f Mon Sep 17 00:00:00 2001 From: Stephen Gregoratto Date: Mon, 15 Jan 2024 11:31:34 +1100 Subject: [PATCH 1/2] Linux: Update syscalls for the 6.7 release This release adds the rest of the futex2[1] syscalls, along with shadow stack[2] support for more architectures. [1]: https://lwn.net/Articles/940944/ [2]: https://lwn.net/Articles/926649/ --- lib/std/os/linux/syscalls.zig | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/lib/std/os/linux/syscalls.zig b/lib/std/os/linux/syscalls.zig index b584f2bd17..a246b83d55 100644 --- a/lib/std/os/linux/syscalls.zig +++ b/lib/std/os/linux/syscalls.zig @@ -444,6 +444,10 @@ pub const X86 = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, }; pub const X64 = enum(usize) { @@ -812,6 +816,9 @@ pub const X64 = enum(usize) { cachestat = 451, fchmodat2 = 452, map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, }; pub const Arm = enum(usize) { @@ -1222,6 +1229,10 @@ pub const Arm = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, breakpoint = arm_base + 1, cacheflush = arm_base + 2, @@ -1616,6 +1627,10 @@ pub const Sparc64 = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, }; pub const Mips = enum(usize) { @@ -2041,6 +2056,10 @@ pub const Mips = enum(usize) { set_mempolicy_home_node = Linux + 450, cachestat = Linux + 451, fchmodat2 = Linux + 452, + map_shadow_stack = Linux + 453, + futex_wake = Linux + 454, + futex_wait = Linux + 455, + futex_requeue = Linux + 456, }; pub const Mips64 = enum(usize) { @@ -2402,6 +2421,10 @@ pub const Mips64 = enum(usize) { set_mempolicy_home_node = Linux + 450, cachestat = Linux + 451, fchmodat2 = Linux + 452, + map_shadow_stack = Linux + 453, + futex_wake = Linux + 454, + futex_wait = Linux + 455, + futex_requeue = Linux + 456, }; pub const PowerPC = enum(usize) { @@ -2838,6 +2861,10 @@ pub const PowerPC = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, }; pub const PowerPC64 = enum(usize) { @@ -3246,6 +3273,10 @@ pub const PowerPC64 = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, }; pub const Arm64 = enum(usize) { @@ -3557,6 +3588,10 @@ pub const Arm64 = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, }; pub const RiscV64 = enum(usize) { @@ -3869,6 +3904,10 @@ pub const RiscV64 = enum(usize) { set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, riscv_flush_icache = arch_specific_syscall + 15, }; From 3200fae9c51a57d6b9e15e04130aa317fea1251b Mon Sep 17 00:00:00 2001 From: Stephen Gregoratto Date: Mon, 15 Jan 2024 17:59:23 +1100 Subject: [PATCH 2/2] Linux: Add syscall bindings, enhance documentation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add syscall bindings/structures for the `futex2` family. The documentation is taken from the syscall definitions. - Add documnentation for the `cachestat` bindings and structures. Taken from work I did in Cosmopolitian libc. - Add binding for `map_shadow_stack`. No documentation for this one, since the kernel devs didn't bother to do it ¯\_(ツ)_/¯. --- lib/std/os/linux.zig | 157 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index d6e539e98c..ee5fe0f186 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -323,6 +323,113 @@ pub fn futex_wake(uaddr: *const i32, futex_op: u32, val: i32) usize { return syscall3(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val))); } +/// Given an array of `futex_waitv`, wait on each uaddr. +/// The thread wakes if a futex_wake() is performed at any uaddr. +/// The syscall returns immediately if any waiter has *uaddr != val. +/// timeout is an optional timeout value for the operation. +/// Each waiter has individual flags. +/// The `flags` argument for the syscall should be used solely for specifying +/// the timeout as realtime, if needed. +/// Flags for private futexes, sizes, etc. should be used on the +/// individual flags of each waiter. +/// +/// Returns the array index of one of the woken futexes. +/// No further information is provided: any number of other futexes may also +/// have been woken by the same event, and if more than one futex was woken, +/// the retrned index may refer to any one of them. +/// (It is not necessaryily the futex with the smallest index, nor the one +/// most recently woken, nor...) +pub fn futex2_waitv( + /// List of futexes to wait on. + waiters: [*]futex_waitv, + /// Length of `waiters`. + nr_futexes: u32, + /// Flag for timeout (monotonic/realtime). + flags: u32, + /// Optional absolute timeout. + timeout: ?*const timespec, + /// Clock to be used for the timeout, realtime or monotonic. + clockid: i32, +) usize { + return syscall6( + .futex_waitv, + @intFromPtr(waiters), + nr_futexes, + flags, + @intFromPtr(timeout), + @bitCast(@as(isize, clockid)), + ); +} + +/// Wait on a futex. +/// Identical to `FUTEX.WAIT`, except it is part of the futex2 family of calls. +pub fn futex2_wait( + /// Address of the futex to wait on. + uaddr: *const anyopaque, + /// Value of `uaddr`. + val: usize, + /// Bitmask. + mask: usize, + /// `FUTEX2` flags. + flags: u32, + /// Optional absolute timeout. + timeout: *const timespec, + /// Clock to be used for the timeout, realtime or monotonic. + clockid: i32, +) usize { + return syscall6( + .futex_wait, + @intFromPtr(uaddr), + val, + mask, + flags, + @intFromPtr(timeout), + @bitCast(@as(isize, clockid)), + ); +} + +/// Wake a number of futexes. +/// Identical to `FUTEX.WAKE`, except it is part of the futex2 family of calls. +pub fn futex2_wake( + /// Address of the futex(es) to wake. + uaddr: [*]const anyopaque, + /// Bitmask + mask: usize, + /// Number of the futexes to wake. + nr: i32, + /// `FUTEX2` flags. + flags: u32, +) usize { + return syscall4( + .futex_wake, + @intFromPtr(uaddr), + mask, + @bitCast(@as(isize, nr)), + flags, + ); +} + +/// Requeue a waiter from one futex to another. +/// Identical to `FUTEX.CMP_REQUEUE`, except it is part of the futex2 family of calls. +pub fn futex2_requeue( + /// Array describing the source and destination futex. + waiters: [*]futex_waitv, + /// Unsed. + flags: u32, + /// Number of futexes to wake. + nr_wake: i32, + /// Number of futexes to requeue. + nr_requeue: i32, +) usize { + return syscall4( + .futex_requeue, + @intFromPtr(waiters), + flags, + @bitCast(@as(isize, nr_wake)), + @bitCast(@as(isize, nr_requeue)), + ); +} + pub fn getcwd(buf: [*]u8, size: usize) usize { return syscall2(.getcwd, @intFromPtr(buf), size); } @@ -1901,10 +2008,17 @@ pub fn ptrace( ); } +/// Query the page cache statistics of a file. pub fn cachestat( + /// The open file descriptor to retrieve statistics from. fd: fd_t, + /// The byte range in `fd` to query. + /// When `len > 0`, the range is `[off..off + len]`. + /// When `len` == 0, the range is from `off` to the end of `fd`. cstat_range: *const cache_stat_range, + /// The structure where page cache statistics are stored. cstat: *cache_stat, + /// Currently unused, and must be set to `0`. flags: u32, ) usize { return syscall4( @@ -1916,6 +2030,10 @@ pub fn cachestat( ); } +pub fn map_shadow_stack(addr: u64, size: u64, flags: u32) usize { + return syscall3(.map_shadow_stack, addr, size, flags); +} + pub const E = switch (native_arch) { .mips, .mipsel => @import("linux/errno/mips.zig").E, .sparc, .sparcel, .sparc64 => @import("linux/errno/sparc.zig").E, @@ -2016,6 +2134,19 @@ pub const FUTEX = struct { pub const PRIVATE_FLAG = 128; pub const CLOCK_REALTIME = 256; + + /// Max numbers of elements in a `futex_waitv` array. + pub const WAITV_MAX = 128; +}; + +pub const FUTEX2 = struct { + pub const SIZE_U8 = 0x00; + pub const SIZE_U16 = 0x01; + pub const SIZE_U32 = 0x02; + pub const SIZE_U64 = 0x03; + pub const NUMA = 0x04; + + pub const PRIVATE = FUTEX.PRIVATE_FLAG; }; pub const PROT = struct { @@ -6100,15 +6231,41 @@ pub const PTRACE = struct { pub const GET_SYSCALL_INFO = 0x420e; }; +/// A waiter for vectorized wait. +pub const futex_waitv = extern struct { + // Expected value at uaddr + val: u64, + /// User address to wait on. + uaddr: u64, + /// Flags for this waiter. + flags: u32, + /// Reserved memeber to preserve alignment. + /// Should be 0. + __reserved: u32, +}; + pub const cache_stat_range = extern struct { off: u64, len: u64, }; pub const cache_stat = extern struct { + /// Number of cached pages. cache: u64, + /// Number of dirty pages. dirty: u64, + /// Number of pages marked for writeback. writeback: u64, + /// Number of pages evicted from the cache. evicted: u64, + /// Number of recently evicted pages. + /// A page is recently evicted if its last eviction was recent enough that its + /// reentry to the cache would indicate that it is actively being used by the + /// system, and that there is memory pressure on the system. recently_evicted: u64, }; + +pub const SHADOW_STACK = struct { + /// Set up a restore token in the shadow stack. + pub const SET_TOKEN: u64 = 1 << 0; +};