mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 12:59:04 +00:00
Merge pull request #10717 from gh-fork-dump/seccomp-bits
Add Seccomp bits for linux
This commit is contained in:
commit
6d48600ea0
@ -1482,6 +1482,12 @@ pub const EM = enum(u16) {
|
||||
/// Linux kernel bpf virtual machine
|
||||
_BPF = 247,
|
||||
|
||||
/// C-SKY
|
||||
_CSKY = 252,
|
||||
|
||||
/// Fujitsu FR-V
|
||||
_FRV = 0x5441,
|
||||
|
||||
_,
|
||||
|
||||
pub fn toTargetCpuArch(em: EM) ?std.Target.Cpu.Arch {
|
||||
|
||||
@ -91,6 +91,7 @@ pub const tls = @import("linux/tls.zig");
|
||||
pub const pie = @import("linux/start_pie.zig");
|
||||
pub const BPF = @import("linux/bpf.zig");
|
||||
pub const IOCTL = @import("linux/ioctl.zig");
|
||||
pub const SECCOMP = @import("linux/seccomp.zig");
|
||||
|
||||
pub const MAP = struct {
|
||||
pub usingnamespace arch_bits.MAP;
|
||||
@ -1691,6 +1692,10 @@ pub fn perf_event_open(
|
||||
);
|
||||
}
|
||||
|
||||
pub fn seccomp(operation: u32, flags: u32, args: ?*const anyopaque) usize {
|
||||
return syscall3(.seccomp, operation, flags, @ptrToInt(args));
|
||||
}
|
||||
|
||||
pub const E = switch (native_arch) {
|
||||
.mips, .mipsel => @import("linux/errno/mips.zig").E,
|
||||
.sparc, .sparcel, .sparcv9 => @import("linux/errno/sparc.zig").E,
|
||||
@ -5409,3 +5414,55 @@ pub const PERF = struct {
|
||||
|
||||
pub const IOC_FLAG_GROUP = 1;
|
||||
};
|
||||
|
||||
// TODO: Add the rest of the AUDIT defines?
|
||||
pub const AUDIT = struct {
|
||||
pub const ARCH = enum(u32) {
|
||||
const _64BIT = 0x80000000;
|
||||
const _LE = 0x40000000;
|
||||
|
||||
pub const current = switch (native_arch) {
|
||||
.i386 => .I386,
|
||||
.x86_64 => .X86_64,
|
||||
.aarch64 => .AARCH64,
|
||||
.arm, .thumb => .ARM,
|
||||
.riscv64 => .RISCV64,
|
||||
.sparcv9 => .SPARC64,
|
||||
.mips => .MIPS,
|
||||
.mipsel => .MIPSEL,
|
||||
.powerpc => .PPC,
|
||||
.powerpc64 => .PPC64,
|
||||
.powerpc64le => .PPC64LE,
|
||||
else => undefined,
|
||||
};
|
||||
|
||||
AARCH64 = toAudit(.aarch64),
|
||||
ARM = toAudit(.arm),
|
||||
ARMEB = toAudit(.armeb),
|
||||
CSKY = toAudit(.csky),
|
||||
HEXAGON = @enumToInt(std.elf.EM._HEXAGON),
|
||||
I386 = toAudit(.i386),
|
||||
M68K = toAudit(.m68k),
|
||||
MIPS = toAudit(.mips),
|
||||
MIPSEL = toAudit(.mips) | _LE,
|
||||
MIPS64 = toAudit(.mips64),
|
||||
MIPSEL64 = toAudit(.mips64) | _LE,
|
||||
PPC = toAudit(.powerpc),
|
||||
PPC64 = toAudit(.powerpc64),
|
||||
PPC64LE = toAudit(.powerpc64le),
|
||||
RISCV32 = toAudit(.riscv32),
|
||||
RISCV64 = toAudit(.riscv64),
|
||||
S390X = toAudit(.s390x),
|
||||
SPARC = toAudit(.sparc),
|
||||
SPARC64 = toAudit(.sparcv9),
|
||||
X86_64 = toAudit(.x86_64),
|
||||
|
||||
fn toAudit(arch: std.Target.Cpu.Arch) u32 {
|
||||
var res: u32 = @enumToInt(arch.toElfMachine());
|
||||
if (arch.endian() == .Little) res |= _LE;
|
||||
if (arch.ptrBitWidth() == 64) res |= _64BIT;
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
212
lib/std/os/linux/seccomp.zig
Normal file
212
lib/std/os/linux/seccomp.zig
Normal file
@ -0,0 +1,212 @@
|
||||
//! API bits for the Secure Computing facility in the Linux kernel, which allows
|
||||
//! processes to restrict access to the system call API.
|
||||
//!
|
||||
//! Seccomp started life with a single "strict" mode, which only allowed calls
|
||||
//! to read(2), write(2), _exit(2) and sigreturn(2). It turns out that this
|
||||
//! isn't that useful for general-purpose applications, and so a mode that
|
||||
//! utilizes user-supplied filters mode was added.
|
||||
//!
|
||||
//! Seccomp filters are classic BPF programs, which means that all the
|
||||
//! information under `std.x.net.bpf` applies here as well. Conceptually, a
|
||||
//! seccomp program is attached to the kernel and is executed on each syscall.
|
||||
//! The "packet" being validated is the `data` structure, and the verdict is an
|
||||
//! action that the kernel performs on the calling process. The actions are
|
||||
//! variations on a "pass" or "fail" result, where a pass allows the syscall to
|
||||
//! continue and a fail blocks the syscall and returns some sort of error value.
|
||||
//! See the full list of actions under ::RET for more information. Finally, only
|
||||
//! word-sized, absolute loads (`ld [k]`) are supported to read from the `data`
|
||||
//! structure.
|
||||
//!
|
||||
//! There are some issues with the filter API that have traditionally made
|
||||
//! writing them a pain:
|
||||
//!
|
||||
//! 1. Each CPU architecture supported by Linux has its own unique ABI and
|
||||
//! syscall API. It is not guaranteed that the syscall numbers and arguments
|
||||
//! are the same across architectures, or that they're even implemted. Thus,
|
||||
//! filters cannot be assumed to be portable without consulting documentation
|
||||
//! like syscalls(2) and testing on target hardware. This also requires
|
||||
//! checking the value of `data.arch` to make sure that a filter was compiled
|
||||
//! for the correct architecture.
|
||||
//! 2. Many syscalls take an `unsigned long` or `size_t` argument, the size of
|
||||
//! which is dependant on the ABI. Since BPF programs execute in a 32-bit
|
||||
//! machine, validation of 64-bit arguments necessitates two load-and-compare
|
||||
//! instructions for the upper and lower words.
|
||||
//! 3. A further wrinkle to the above is endianess. Unlike network packets,
|
||||
//! syscall data shares the endianess of the target machine. A filter
|
||||
//! compiled on a little-endian machine will not work on a big-endian one,
|
||||
//! and vice-versa. For example: Checking the upper 32-bits of `data.arg1`
|
||||
//! requires a load at `@offsetOf(data, "arg1") + 4` on big-endian systems
|
||||
//! and `@offsetOf(data, "arg1")` on little-endian systems. Endian-portable
|
||||
//! filters require adjusting these offsets at compile time, similar to how
|
||||
//! e.g. OpenSSH does[1].
|
||||
//! 4. Syscalls with userspace implementations via the vDSO cannot be traced or
|
||||
//! filtered. The vDSO can be disabled or just ignored, which must be taken
|
||||
//! into account when writing filters.
|
||||
//! 5. Software libraries - especially dynamically loaded ones - tend to use
|
||||
//! more of the syscall API over time, thus filters must evolve with them.
|
||||
//! Static filters can result in reduced or even broken functionality when
|
||||
//! calling newer code from these libraries. This is known to happen with
|
||||
//! critical libraries like glibc[2].
|
||||
//!
|
||||
//! Some of these issues can be mitigated with help from Zig and the standard
|
||||
//! library. Since the target CPU is known at compile time, the proper syscall
|
||||
//! numbers are mixed into the `os` namespace under `std.os.SYS (see the code
|
||||
//! for `arch_bits` in `os/linux.zig`). Referencing an unimplemented syscall
|
||||
//! would be a compile error. Endian offsets can also be defined in a similar
|
||||
//! manner to the OpenSSH example:
|
||||
//!
|
||||
//! ```zig
|
||||
//! const offset = if (native_endian == .Little) struct {
|
||||
//! pub const low = 0;
|
||||
//! pub const high = @sizeOf(u32);
|
||||
//! } else struct {
|
||||
//! pub const low = @sizeOf(u32);
|
||||
//! pub const high = 0;
|
||||
//! };
|
||||
//! ```
|
||||
//!
|
||||
//! Unfortunately, there is no easy solution for issue 5. The most reliable
|
||||
//! strategy is to keep testing; test newer Zig versions, different libcs,
|
||||
//! different distros, and design your filter to accomidate all of them.
|
||||
//! Alternatively, you could inject a filter at runtime. Since filters are
|
||||
//! preserved across execve(2), a filter could be setup before executing your
|
||||
//! program, without your program having any knowledge of this happening. This
|
||||
//! is the method used by systemd[3] and Cloudflare's sandbox library[4].
|
||||
//!
|
||||
//! [1]: https://github.com/openssh/openssh-portable/blob/master/sandbox-seccomp-filter.c#L81
|
||||
//! [2]: https://sourceware.org/legacy-ml/libc-alpha/2017-11/msg00246.html
|
||||
//! [3]: https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=
|
||||
//! [4]: https://github.com/cloudflare/sandbox
|
||||
//!
|
||||
//! See Also
|
||||
//! - seccomp(2), seccomp_unotify(2)
|
||||
//! - https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html
|
||||
const IOCTL = @import("ioctl.zig");
|
||||
|
||||
// Modes for the prctl(2) form `prctl(PR_SET_SECCOMP, mode)`
|
||||
pub const MODE = struct {
|
||||
/// Seccomp not in use.
|
||||
pub const DISABLED = 0;
|
||||
/// Uses a hard-coded filter.
|
||||
pub const STRICT = 1;
|
||||
/// Uses a user-supplied filter.
|
||||
pub const FILTER = 2;
|
||||
};
|
||||
|
||||
// Operations for the seccomp(2) form `seccomp(operation, flags, args)`
|
||||
pub const SET_MODE_STRICT = 0;
|
||||
pub const SET_MODE_FILTER = 1;
|
||||
pub const GET_ACTION_AVAIL = 2;
|
||||
pub const GET_NOTIF_SIZES = 3;
|
||||
|
||||
/// Bitflags for the SET_MODE_FILTER operation.
|
||||
pub const FILTER_FLAG = struct {
|
||||
pub const TSYNC = 1 << 0;
|
||||
pub const LOG = 1 << 1;
|
||||
pub const SPEC_ALLOW = 1 << 2;
|
||||
pub const NEW_LISTENER = 1 << 3;
|
||||
pub const TSYNC_ESRCH = 1 << 4;
|
||||
};
|
||||
|
||||
/// Action values for seccomp BPF programs.
|
||||
/// The lower 16-bits are for optional return data.
|
||||
/// The upper 16-bits are ordered from least permissive values to most.
|
||||
pub const RET = struct {
|
||||
/// Kill the process.
|
||||
pub const KILL_PROCESS = 0x80000000;
|
||||
/// Kill the thread.
|
||||
pub const KILL_THREAD = 0x00000000;
|
||||
pub const KILL = KILL_THREAD;
|
||||
/// Disallow and force a SIGSYS.
|
||||
pub const TRAP = 0x00030000;
|
||||
/// Return an errno.
|
||||
pub const ERRNO = 0x00050000;
|
||||
/// Forward the syscall to a userspace supervisor to make a decision.
|
||||
pub const USER_NOTIF = 0x7fc00000;
|
||||
/// Pass to a tracer or disallow.
|
||||
pub const TRACE = 0x7ff00000;
|
||||
/// Allow after logging.
|
||||
pub const LOG = 0x7ffc0000;
|
||||
/// Allow.
|
||||
pub const ALLOW = 0x7fff0000;
|
||||
|
||||
// Masks for the return value sections.
|
||||
pub const ACTION_FULL = 0xffff0000;
|
||||
pub const ACTION = 0x7fff0000;
|
||||
pub const DATA = 0x0000ffff;
|
||||
};
|
||||
|
||||
pub const IOCTL_NOTIF = struct {
|
||||
pub const RECV = IOCTL.IOWR('!', 0, notif);
|
||||
pub const SEND = IOCTL.IOWR('!', 1, notif_resp);
|
||||
pub const ID_VALID = IOCTL.IOW('!', 2, u64);
|
||||
pub const ADDFD = IOCTL.IOW('!', 3, notif_addfd);
|
||||
};
|
||||
|
||||
/// Tells the kernel that the supervisor allows the syscall to continue.
|
||||
pub const USER_NOTIF_FLAG_CONTINUE = 1 << 0;
|
||||
|
||||
/// See seccomp_unotify(2).
|
||||
pub const ADDFD_FLAG = struct {
|
||||
pub const SETFD = 1 << 0;
|
||||
pub const SEND = 1 << 1;
|
||||
};
|
||||
|
||||
pub const data = extern struct {
|
||||
/// The system call number.
|
||||
nr: c_int,
|
||||
/// The CPU architecture/system call convention.
|
||||
/// One of the values defined in `std.os.linux.AUDIT`.
|
||||
arch: u32,
|
||||
instruction_pointer: u64,
|
||||
arg0: u64,
|
||||
arg1: u64,
|
||||
arg2: u64,
|
||||
arg3: u64,
|
||||
arg4: u64,
|
||||
arg5: u64,
|
||||
};
|
||||
|
||||
/// Used with the ::GET_NOTIF_SIZES command to check if the kernel structures
|
||||
/// have changed.
|
||||
pub const notif_sizes = extern struct {
|
||||
/// Size of ::notif.
|
||||
notif: u16,
|
||||
/// Size of ::resp.
|
||||
notif_resp: u16,
|
||||
/// Size of ::data.
|
||||
data: u16,
|
||||
};
|
||||
|
||||
pub const notif = extern struct {
|
||||
/// Unique notification cookie for each filter.
|
||||
id: u64,
|
||||
/// ID of the thread that triggered the notification.
|
||||
pid: u32,
|
||||
/// Bitmask for event information. Currently set to zero.
|
||||
flags: u32,
|
||||
/// The current system call data.
|
||||
data: data,
|
||||
};
|
||||
|
||||
/// The decision payload the supervisor process sends to the kernel.
|
||||
pub const notif_resp = extern struct {
|
||||
/// The filter cookie.
|
||||
id: u64,
|
||||
/// The return value for a spoofed syscall.
|
||||
val: i64,
|
||||
/// Set to zero for a spoofed success or a negative error number for a
|
||||
/// failure.
|
||||
@"error": i32,
|
||||
/// Bitmask containing the decision. Either USER_NOTIF_FLAG_CONTINUE to
|
||||
/// allow the syscall or zero to spoof the return values.
|
||||
flags: u32,
|
||||
};
|
||||
|
||||
pub const notif_addfd = extern struct {
|
||||
id: u64,
|
||||
flags: u32,
|
||||
srcfd: u32,
|
||||
newfd: u32,
|
||||
newfd_flags: u32,
|
||||
};
|
||||
@ -963,7 +963,7 @@ pub const Target = struct {
|
||||
.amdgcn => ._NONE,
|
||||
.bpfel => ._BPF,
|
||||
.bpfeb => ._BPF,
|
||||
.csky => ._NONE,
|
||||
.csky => ._CSKY,
|
||||
.sparcv9 => ._SPARCV9,
|
||||
.s390x => ._S390,
|
||||
.ve => ._NONE,
|
||||
|
||||
@ -9,6 +9,7 @@ pub const os = struct {
|
||||
pub const net = struct {
|
||||
pub const ip = @import("x/net/ip.zig");
|
||||
pub const tcp = @import("x/net/tcp.zig");
|
||||
pub const bpf = @import("x/net/bpf.zig");
|
||||
};
|
||||
|
||||
test {
|
||||
|
||||
1008
lib/std/x/net/bpf.zig
Normal file
1008
lib/std/x/net/bpf.zig
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user