mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 22:33:08 +00:00
610 lines
19 KiB
Zig
610 lines
19 KiB
Zig
//! This file implements the two TLS variants [1] used by ELF-based systems. Note that, in reality,
|
|
//! Variant I has two sub-variants.
|
|
//!
|
|
//! It is important to understand that the term TCB (Thread Control Block) is overloaded here.
|
|
//! Official ABI documentation uses it simply to mean the ABI TCB, i.e. a small area of ABI-defined
|
|
//! data, usually one or two words (see the `AbiTcb` type below). People will also often use TCB to
|
|
//! refer to the libc TCB, which can be any size and contain anything. (One could even omit it!) We
|
|
//! refer to the latter as the Zig TCB; see the `ZigTcb` type below.
|
|
//!
|
|
//! [1] https://www.akkadia.org/drepper/tls.pdf
|
|
|
|
const std = @import("std");
|
|
const mem = std.mem;
|
|
const elf = std.elf;
|
|
const math = std.math;
|
|
const assert = std.debug.assert;
|
|
const native_arch = @import("builtin").cpu.arch;
|
|
const linux = std.os.linux;
|
|
const page_size_min = std.heap.page_size_min;
|
|
|
|
/// Represents an ELF TLS variant.
|
|
///
|
|
/// In all variants, the TP and the TLS blocks must be aligned to the `p_align` value in the
|
|
/// `PT_TLS` ELF program header. Everything else has natural alignment.
|
|
///
|
|
/// The location of the DTV does not actually matter. For simplicity, we put it in the TLS area, but
|
|
/// there is no actual ABI requirement that it reside there.
|
|
const Variant = enum {
|
|
/// The original Variant I:
|
|
///
|
|
/// ----------------------------------------
|
|
/// | DTV | Zig TCB | ABI TCB | TLS Blocks |
|
|
/// ----------------^-----------------------
|
|
/// `-- The TP register points here.
|
|
///
|
|
/// The layout in this variant necessitates separate alignment of both the TP and the TLS
|
|
/// blocks.
|
|
///
|
|
/// The first word in the ABI TCB points to the DTV. For some architectures, there may be a
|
|
/// second word with an unspecified meaning.
|
|
I_original,
|
|
/// The modified Variant I:
|
|
///
|
|
/// ---------------------------------------------------
|
|
/// | DTV | Zig TCB | ABI TCB | [Offset] | TLS Blocks |
|
|
/// -------------------------------------^-------------
|
|
/// `-- The TP register points here.
|
|
///
|
|
/// The offset (which can be zero) is applied to the TP only; there is never a physical gap
|
|
/// between the ABI TCB and the TLS blocks. This implies that we only need to align the TP.
|
|
///
|
|
/// The first (and only) word in the ABI TCB points to the DTV.
|
|
I_modified,
|
|
/// Variant II:
|
|
///
|
|
/// ----------------------------------------
|
|
/// | TLS Blocks | ABI TCB | Zig TCB | DTV |
|
|
/// -------------^--------------------------
|
|
/// `-- The TP register points here.
|
|
///
|
|
/// The first (and only) word in the ABI TCB points to the ABI TCB itself.
|
|
II,
|
|
};
|
|
|
|
const current_variant: Variant = switch (native_arch) {
|
|
.aarch64,
|
|
.aarch64_be,
|
|
.alpha,
|
|
.arc,
|
|
.arceb,
|
|
.arm,
|
|
.armeb,
|
|
.csky,
|
|
.hppa,
|
|
.microblaze,
|
|
.microblazeel,
|
|
.sh,
|
|
.sheb,
|
|
.thumb,
|
|
.thumbeb,
|
|
=> .I_original,
|
|
.loongarch32,
|
|
.loongarch64,
|
|
.m68k,
|
|
.mips,
|
|
.mipsel,
|
|
.mips64,
|
|
.mips64el,
|
|
.or1k,
|
|
.powerpc,
|
|
.powerpcle,
|
|
.powerpc64,
|
|
.powerpc64le,
|
|
.riscv32,
|
|
.riscv64,
|
|
=> .I_modified,
|
|
.hexagon,
|
|
.s390x,
|
|
.sparc,
|
|
.sparc64,
|
|
.x86,
|
|
.x86_64,
|
|
=> .II,
|
|
else => @compileError("undefined TLS variant for this architecture"),
|
|
};
|
|
|
|
/// The Offset value for the modified Variant I.
|
|
const current_tp_offset = switch (native_arch) {
|
|
.m68k,
|
|
.mips,
|
|
.mipsel,
|
|
.mips64,
|
|
.mips64el,
|
|
.powerpc,
|
|
.powerpcle,
|
|
.powerpc64,
|
|
.powerpc64le,
|
|
=> 0x7000,
|
|
else => 0,
|
|
};
|
|
|
|
/// Usually only used by the modified Variant I.
|
|
const current_dtv_offset = switch (native_arch) {
|
|
.m68k,
|
|
.mips,
|
|
.mipsel,
|
|
.mips64,
|
|
.mips64el,
|
|
.powerpc,
|
|
.powerpcle,
|
|
.powerpc64,
|
|
.powerpc64le,
|
|
=> 0x8000,
|
|
.riscv32,
|
|
.riscv64,
|
|
=> 0x800,
|
|
else => 0,
|
|
};
|
|
|
|
/// Per-thread storage for the ELF TLS ABI.
|
|
const AbiTcb = switch (current_variant) {
|
|
.I_original, .I_modified => switch (native_arch) {
|
|
.aarch64,
|
|
.aarch64_be,
|
|
.alpha,
|
|
.arm,
|
|
.armeb,
|
|
.hppa,
|
|
.microblaze,
|
|
.microblazeel,
|
|
.sh,
|
|
.sheb,
|
|
.thumb,
|
|
.thumbeb,
|
|
=> extern struct {
|
|
/// This is offset by `current_dtv_offset`.
|
|
dtv: usize,
|
|
_reserved: ?*anyopaque,
|
|
},
|
|
else => extern struct {
|
|
/// This is offset by `current_dtv_offset`.
|
|
dtv: usize,
|
|
},
|
|
},
|
|
.II => extern struct {
|
|
/// This is self-referential.
|
|
self: *AbiTcb,
|
|
},
|
|
};
|
|
|
|
/// Per-thread storage for Zig's use. Currently unused.
|
|
const ZigTcb = struct {
|
|
dummy: usize,
|
|
};
|
|
|
|
/// Dynamic Thread Vector as specified in the ELF TLS ABI. Ordinarily, there is a block pointer per
|
|
/// dynamically-loaded module, but since we only support static TLS, we only need one block pointer.
|
|
const Dtv = extern struct {
|
|
len: usize = 1,
|
|
tls_block: [*]u8,
|
|
};
|
|
|
|
/// Describes a process's TLS area. The area encompasses the DTV, both TCBs, and the TLS block, with
|
|
/// the exact layout of these being dependent primarily on `current_variant`.
|
|
const AreaDesc = struct {
|
|
size: usize,
|
|
alignment: usize,
|
|
|
|
dtv: struct {
|
|
/// Offset into the TLS area.
|
|
offset: usize,
|
|
},
|
|
|
|
abi_tcb: struct {
|
|
/// Offset into the TLS area.
|
|
offset: usize,
|
|
},
|
|
|
|
block: struct {
|
|
/// The initial data to be copied into the TLS block. Note that this may be smaller than
|
|
/// `size`, in which case any remaining data in the TLS block is simply left uninitialized.
|
|
init: []const u8,
|
|
/// Offset into the TLS area.
|
|
offset: usize,
|
|
/// This is the effective size of the TLS block, which may be greater than `init.len`.
|
|
size: usize,
|
|
},
|
|
|
|
/// Only used on the 32-bit x86 architecture (not x86_64, nor x32).
|
|
gdt_entry_number: usize,
|
|
};
|
|
|
|
pub var area_desc: AreaDesc = undefined;
|
|
|
|
pub fn setThreadPointer(addr: usize) void {
|
|
@setRuntimeSafety(false);
|
|
@disableInstrumentation();
|
|
|
|
switch (native_arch) {
|
|
.x86 => {
|
|
var user_desc: linux.user_desc = .{
|
|
.entry_number = area_desc.gdt_entry_number,
|
|
.base_addr = addr,
|
|
.limit = 0xfffff,
|
|
.flags = .{
|
|
.seg_32bit = 1,
|
|
.contents = 0, // Data
|
|
.read_exec_only = 0,
|
|
.limit_in_pages = 1,
|
|
.seg_not_present = 0,
|
|
.useable = 1,
|
|
},
|
|
};
|
|
const rc = @call(.always_inline, linux.syscall1, .{ .set_thread_area, @intFromPtr(&user_desc) });
|
|
assert(rc == 0);
|
|
|
|
const gdt_entry_number = user_desc.entry_number;
|
|
// We have to keep track of our slot as it's also needed for clone()
|
|
area_desc.gdt_entry_number = gdt_entry_number;
|
|
// Update the %gs selector
|
|
asm volatile ("movl %[gs_val], %%gs"
|
|
:
|
|
: [gs_val] "r" (gdt_entry_number << 3 | 3),
|
|
);
|
|
},
|
|
.x86_64 => {
|
|
const rc = @call(.always_inline, linux.syscall2, .{ .arch_prctl, linux.ARCH.SET_FS, addr });
|
|
assert(rc == 0);
|
|
},
|
|
.aarch64, .aarch64_be => {
|
|
asm volatile (
|
|
\\ msr tpidr_el0, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.alpha => {
|
|
asm volatile (
|
|
\\ lda a0, %[addr]
|
|
\\ wruniq
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.arc, .arceb => {
|
|
// We apparently need to both set r25 (TP) *and* inform the kernel...
|
|
asm volatile (
|
|
\\ mov r25, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
const rc = @call(.always_inline, linux.syscall1, .{ .arc_settls, addr });
|
|
assert(rc == 0);
|
|
},
|
|
.arm, .armeb, .thumb, .thumbeb => {
|
|
const rc = @call(.always_inline, linux.syscall1, .{ .set_tls, addr });
|
|
assert(rc == 0);
|
|
},
|
|
.m68k => {
|
|
const rc = linux.syscall1(.set_thread_area, addr);
|
|
assert(rc == 0);
|
|
},
|
|
.hexagon => {
|
|
asm volatile (
|
|
\\ ugp = %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.hppa => {
|
|
asm volatile (
|
|
\\ ble 0xe0(%%sr2, %%r0)
|
|
:
|
|
: [addr] "={r26}" (addr),
|
|
: .{ .r29 = true });
|
|
},
|
|
.loongarch32, .loongarch64 => {
|
|
asm volatile (
|
|
\\ move $tp, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.riscv32, .riscv64 => {
|
|
asm volatile (
|
|
\\ mv tp, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.csky, .mips, .mipsel, .mips64, .mips64el => {
|
|
const rc = @call(.always_inline, linux.syscall1, .{ .set_thread_area, addr });
|
|
assert(rc == 0);
|
|
},
|
|
.microblaze, .microblazeel => {
|
|
asm volatile (
|
|
\\ ori r21, %[addr], 0
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.or1k => {
|
|
asm volatile (
|
|
\\ l.ori r10, %[addr], 0
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.powerpc, .powerpcle => {
|
|
asm volatile (
|
|
\\ mr 2, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.powerpc64, .powerpc64le => {
|
|
asm volatile (
|
|
\\ mr 13, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.s390x => {
|
|
asm volatile (
|
|
\\ lgr %%r0, %[addr]
|
|
\\ sar %%a1, %%r0
|
|
\\ srlg %%r0, %%r0, 32
|
|
\\ sar %%a0, %%r0
|
|
:
|
|
: [addr] "r" (addr),
|
|
: .{ .r0 = true });
|
|
},
|
|
.sh, .sheb => {
|
|
asm volatile (
|
|
\\ ldc gbr, %[addr]
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
.sparc, .sparc64 => {
|
|
asm volatile (
|
|
\\ mov %[addr], %%g7
|
|
:
|
|
: [addr] "r" (addr),
|
|
);
|
|
},
|
|
else => @compileError("Unsupported architecture"),
|
|
}
|
|
}
|
|
|
|
fn computeAreaDesc(phdrs: []elf.Phdr) void {
|
|
@setRuntimeSafety(false);
|
|
@disableInstrumentation();
|
|
|
|
var tls_phdr: ?*elf.Phdr = null;
|
|
var img_base: usize = 0;
|
|
|
|
for (phdrs) |*phdr| {
|
|
switch (phdr.p_type) {
|
|
elf.PT_PHDR => img_base = @intFromPtr(phdrs.ptr) - phdr.p_vaddr,
|
|
elf.PT_TLS => tls_phdr = phdr,
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
var align_factor: usize = undefined;
|
|
var block_init: []const u8 = undefined;
|
|
var block_size: usize = undefined;
|
|
|
|
if (tls_phdr) |phdr| {
|
|
align_factor = phdr.p_align;
|
|
|
|
// The effective size in memory is represented by `p_memsz`; the length of the data stored
|
|
// in the `PT_TLS` segment is `p_filesz` and may be less than the former.
|
|
block_init = @as([*]u8, @ptrFromInt(img_base + phdr.p_vaddr))[0..phdr.p_filesz];
|
|
block_size = phdr.p_memsz;
|
|
} else {
|
|
align_factor = @alignOf(usize);
|
|
|
|
block_init = &[_]u8{};
|
|
block_size = 0;
|
|
}
|
|
|
|
// Offsets into the allocated TLS area.
|
|
var dtv_offset: usize = undefined;
|
|
var abi_tcb_offset: usize = undefined;
|
|
var block_offset: usize = undefined;
|
|
|
|
// Compute the total size of the ABI-specific data plus our own `ZigTcb` structure. All the
|
|
// offsets calculated here assume a well-aligned base address.
|
|
const area_size = switch (current_variant) {
|
|
.I_original => blk: {
|
|
var l: usize = 0;
|
|
dtv_offset = l;
|
|
l += @sizeOf(Dtv);
|
|
// Add some padding here so that the TP (`abi_tcb_offset`) is aligned to `align_factor`
|
|
// and the `ZigTcb` structure can be found by simply subtracting `@sizeOf(ZigTcb)` from
|
|
// the TP.
|
|
const delta = (l + @sizeOf(ZigTcb)) & (align_factor - 1);
|
|
if (delta > 0)
|
|
l += align_factor - delta;
|
|
l += @sizeOf(ZigTcb);
|
|
abi_tcb_offset = l;
|
|
l += alignForward(@sizeOf(AbiTcb), align_factor);
|
|
block_offset = l;
|
|
l += block_size;
|
|
break :blk l;
|
|
},
|
|
.I_modified => blk: {
|
|
var l: usize = 0;
|
|
dtv_offset = l;
|
|
l += @sizeOf(Dtv);
|
|
// In this variant, the TLS blocks must begin immediately after the end of the ABI TCB,
|
|
// with the TP pointing to the beginning of the TLS blocks. Add padding so that the TP
|
|
// (`abi_tcb_offset`) is aligned to `align_factor` and the `ZigTcb` structure can be
|
|
// found by subtracting `@sizeOf(AbiTcb) + @sizeOf(ZigTcb)` from the TP.
|
|
const delta = (l + @sizeOf(ZigTcb) + @sizeOf(AbiTcb)) & (align_factor - 1);
|
|
if (delta > 0)
|
|
l += align_factor - delta;
|
|
l += @sizeOf(ZigTcb);
|
|
abi_tcb_offset = l;
|
|
l += @sizeOf(AbiTcb);
|
|
block_offset = l;
|
|
l += block_size;
|
|
break :blk l;
|
|
},
|
|
.II => blk: {
|
|
var l: usize = 0;
|
|
block_offset = l;
|
|
l += alignForward(block_size, align_factor);
|
|
// The TP is aligned to `align_factor`.
|
|
abi_tcb_offset = l;
|
|
l += @sizeOf(AbiTcb);
|
|
// The `ZigTcb` structure is right after the `AbiTcb` with no padding in between so it
|
|
// can be easily found.
|
|
l += @sizeOf(ZigTcb);
|
|
// It doesn't really matter where we put the DTV, so give it natural alignment.
|
|
l = alignForward(l, @alignOf(Dtv));
|
|
dtv_offset = l;
|
|
l += @sizeOf(Dtv);
|
|
break :blk l;
|
|
},
|
|
};
|
|
|
|
area_desc = .{
|
|
.size = area_size,
|
|
.alignment = align_factor,
|
|
|
|
.dtv = .{
|
|
.offset = dtv_offset,
|
|
},
|
|
|
|
.abi_tcb = .{
|
|
.offset = abi_tcb_offset,
|
|
},
|
|
|
|
.block = .{
|
|
.init = block_init,
|
|
.offset = block_offset,
|
|
.size = block_size,
|
|
},
|
|
|
|
.gdt_entry_number = @as(usize, @bitCast(@as(isize, -1))),
|
|
};
|
|
}
|
|
|
|
/// Inline because TLS is not set up yet.
|
|
inline fn alignForward(addr: usize, alignment: usize) usize {
|
|
return alignBackward(addr + (alignment - 1), alignment);
|
|
}
|
|
|
|
/// Inline because TLS is not set up yet.
|
|
inline fn alignBackward(addr: usize, alignment: usize) usize {
|
|
return addr & ~(alignment - 1);
|
|
}
|
|
|
|
/// Inline because TLS is not set up yet.
|
|
inline fn alignPtrCast(comptime T: type, ptr: [*]u8) *T {
|
|
return @ptrCast(@alignCast(ptr));
|
|
}
|
|
|
|
/// Initializes all the fields of the static TLS area and returns the computed architecture-specific
|
|
/// value of the TP register.
|
|
pub fn prepareArea(area: []u8) usize {
|
|
@setRuntimeSafety(false);
|
|
@disableInstrumentation();
|
|
|
|
// Clear the area we're going to use, just to be safe.
|
|
@memset(area, 0);
|
|
|
|
// Prepare the ABI TCB.
|
|
const abi_tcb = alignPtrCast(AbiTcb, area.ptr + area_desc.abi_tcb.offset);
|
|
switch (current_variant) {
|
|
.I_original, .I_modified => abi_tcb.dtv = @intFromPtr(area.ptr + area_desc.dtv.offset),
|
|
.II => abi_tcb.self = abi_tcb,
|
|
}
|
|
|
|
// Prepare the DTV.
|
|
const dtv = alignPtrCast(Dtv, area.ptr + area_desc.dtv.offset);
|
|
dtv.len = 1;
|
|
dtv.tls_block = area.ptr + current_dtv_offset + area_desc.block.offset;
|
|
|
|
// Copy the initial data.
|
|
@memcpy(area[area_desc.block.offset..][0..area_desc.block.init.len], area_desc.block.init);
|
|
|
|
// Return the corrected value (if needed) for the TP register. Overflow here is not a problem;
|
|
// the pointer arithmetic involving the TP is done with wrapping semantics.
|
|
return @intFromPtr(area.ptr) +% switch (current_variant) {
|
|
.I_original, .II => area_desc.abi_tcb.offset,
|
|
.I_modified => area_desc.block.offset +% current_tp_offset,
|
|
};
|
|
}
|
|
|
|
/// The main motivation for the size chosen here is that this is how much ends up being requested for
|
|
/// the thread-local variables of the `std.crypto.random` implementation. I'm not sure why it ends up
|
|
/// being so much; the struct itself is only 64 bytes. I think it has to do with being page-aligned
|
|
/// and LLVM or LLD is not smart enough to lay out the TLS data in a space-conserving way. Anyway, I
|
|
/// think it's fine because it's less than 3 pages of memory, and putting it in the ELF like this is
|
|
/// equivalent to moving the `mmap` call below into the kernel, avoiding syscall overhead.
|
|
var main_thread_area_buffer: [0x2100]u8 align(page_size_min) = undefined;
|
|
|
|
/// Computes the layout of the static TLS area, allocates the area, initializes all of its fields,
|
|
/// and assigns the architecture-specific value to the TP register.
|
|
pub fn initStatic(phdrs: []elf.Phdr) void {
|
|
@setRuntimeSafety(false);
|
|
@disableInstrumentation();
|
|
|
|
computeAreaDesc(phdrs);
|
|
|
|
const area = blk: {
|
|
// Fast path for the common case where the TLS data is really small, avoid an allocation and
|
|
// use our local buffer.
|
|
if (area_desc.alignment <= page_size_min and area_desc.size <= main_thread_area_buffer.len) {
|
|
break :blk main_thread_area_buffer[0..area_desc.size];
|
|
}
|
|
|
|
const begin_addr = mmap_tls(area_desc.size + area_desc.alignment - 1);
|
|
if (@call(.always_inline, linux.errno, .{begin_addr}) != .SUCCESS) @trap();
|
|
|
|
const area_ptr: [*]align(page_size_min) u8 = @ptrFromInt(begin_addr);
|
|
|
|
// Make sure the slice is correctly aligned.
|
|
const begin_aligned_addr = alignForward(begin_addr, area_desc.alignment);
|
|
const start = begin_aligned_addr - begin_addr;
|
|
break :blk area_ptr[start..][0..area_desc.size];
|
|
};
|
|
|
|
const tp_value = prepareArea(area);
|
|
setThreadPointer(tp_value);
|
|
}
|
|
|
|
inline fn mmap_tls(length: usize) usize {
|
|
const prot = linux.PROT.READ | linux.PROT.WRITE;
|
|
const flags: linux.MAP = .{ .TYPE = .PRIVATE, .ANONYMOUS = true };
|
|
|
|
if (@hasField(linux.SYS, "mmap2")) {
|
|
return @call(.always_inline, linux.syscall6, .{
|
|
.mmap2,
|
|
0,
|
|
length,
|
|
prot,
|
|
@as(u32, @bitCast(flags)),
|
|
@as(usize, @bitCast(@as(isize, -1))),
|
|
0,
|
|
});
|
|
} else {
|
|
// The s390x mmap() syscall existed before Linux supported syscalls with 5+ parameters, so
|
|
// it takes a single pointer to an array of arguments instead.
|
|
return if (native_arch == .s390x) @call(.always_inline, linux.syscall1, .{
|
|
.mmap,
|
|
@intFromPtr(&[_]usize{
|
|
0,
|
|
length,
|
|
prot,
|
|
@as(u32, @bitCast(flags)),
|
|
@as(usize, @bitCast(@as(isize, -1))),
|
|
0,
|
|
}),
|
|
}) else @call(.always_inline, linux.syscall6, .{
|
|
.mmap,
|
|
0,
|
|
length,
|
|
prot,
|
|
@as(u32, @bitCast(flags)),
|
|
@as(usize, @bitCast(@as(isize, -1))),
|
|
0,
|
|
});
|
|
}
|
|
}
|