Merge pull request #20857 from alexrp/tls-porting

`std.os.linux.tls`: Refactor, improve documentation, fix a bug, and port to more architectures
This commit is contained in:
Andrew Kelley 2024-08-01 01:15:17 -07:00 committed by GitHub
commit 91163b44dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 340 additions and 193 deletions

View File

@ -1261,9 +1261,9 @@ const LinuxThreadImpl = struct {
bytes = std.mem.alignForward(usize, bytes, page_size); bytes = std.mem.alignForward(usize, bytes, page_size);
stack_offset = bytes; stack_offset = bytes;
bytes = std.mem.alignForward(usize, bytes, linux.tls.tls_image.alloc_align); bytes = std.mem.alignForward(usize, bytes, linux.tls.area_desc.alignment);
tls_offset = bytes; tls_offset = bytes;
bytes += linux.tls.tls_image.alloc_size; bytes += linux.tls.area_desc.size;
bytes = std.mem.alignForward(usize, bytes, @alignOf(Instance)); bytes = std.mem.alignForward(usize, bytes, @alignOf(Instance));
instance_offset = bytes; instance_offset = bytes;
@ -1304,12 +1304,12 @@ const LinuxThreadImpl = struct {
}; };
// Prepare the TLS segment and prepare a user_desc struct when needed on x86 // Prepare the TLS segment and prepare a user_desc struct when needed on x86
var tls_ptr = linux.tls.prepareTLS(mapped[tls_offset..]); var tls_ptr = linux.tls.prepareArea(mapped[tls_offset..]);
var user_desc: if (target.cpu.arch == .x86) linux.user_desc else void = undefined; var user_desc: if (target.cpu.arch == .x86) linux.user_desc else void = undefined;
if (target.cpu.arch == .x86) { if (target.cpu.arch == .x86) {
defer tls_ptr = @intFromPtr(&user_desc); defer tls_ptr = @intFromPtr(&user_desc);
user_desc = .{ user_desc = .{
.entry_number = linux.tls.tls_image.gdt_entry_number, .entry_number = linux.tls.area_desc.gdt_entry_number,
.base_addr = tls_ptr, .base_addr = tls_ptr,
.limit = 0xfffff, .limit = 0xfffff,
.flags = .{ .flags = .{

View File

@ -1,3 +1,14 @@
//! This file implements the two TLS variants [1] used by ELF-based systems. Note that, in reality,
//! Variant I has two sub-variants.
//!
//! It is important to understand that the term TCB (Thread Control Block) is overloaded here.
//! Official ABI documentation uses it simply to mean the ABI TCB, i.e. a small area of ABI-defined
//! data, usually one or two words (see the `AbiTcb` type below). People will also often use TCB to
//! refer to the libc TCB, which can be any size and contain anything. (One could even omit it!) We
//! refer to the latter as the Zig TCB; see the `ZigTcb` type below.
//!
//! [1] https://www.akkadia.org/drepper/tls.pdf
const std = @import("std"); const std = @import("std");
const mem = std.mem; const mem = std.mem;
const elf = std.elf; const elf = std.elf;
@ -7,56 +18,63 @@ const native_arch = @import("builtin").cpu.arch;
const linux = std.os.linux; const linux = std.os.linux;
const posix = std.posix; const posix = std.posix;
// This file implements the two TLS variants [1] used by ELF-based systems. /// Represents an ELF TLS variant.
// ///
// The variant I has the following layout in memory: /// In all variants, the TP and the TLS blocks must be aligned to the `p_align` value in the
// ------------------------------------------------------- /// `PT_TLS` ELF program header. Everything else has natural alignment.
// | DTV | Zig | DTV | Alignment | TLS | ///
// | storage | thread data | pointer | | block | /// The location of the DTV does not actually matter. For simplicity, we put it in the TLS area, but
// ------------------------^------------------------------ /// there is no actual ABI requirement that it reside there.
// `-- The thread pointer register points here const Variant = enum {
// /// The original Variant I:
// In this case we allocate additional space for our control structure that's ///
// placed _before_ the DTV pointer together with the DTV. /// ----------------------------------------
// /// | DTV | Zig TCB | ABI TCB | TLS Blocks |
// NOTE: Some systems such as power64 or mips use this variant with a twist: the /// ----------------^-----------------------
// alignment is not present and the tp and DTV addresses are offset by a /// `-- The TP register points here.
// constant. ///
// /// The layout in this variant necessitates separate alignment of both the TP and the TLS
// On the other hand the variant II has the following layout in memory: /// blocks.
// --------------------------------------- ///
// | TLS | TCB | Zig | DTV | /// The first word in the ABI TCB points to the DTV. For some architectures, there may be a
// | block | | thread data | storage | /// second word with an unspecified meaning.
// --------^------------------------------ I_original,
// `-- The thread pointer register points here /// The modified Variant I:
// ///
// The structure of the TCB is not defined by the ABI so we reserve enough space /// ---------------------------------------------------
// for a single pointer as some architectures such as x86 and x86_64 need a /// | DTV | Zig TCB | ABI TCB | [Offset] | TLS Blocks |
// pointer to the TCB block itself at the address pointed by the tp. /// -------------------------------------^-------------
// /// `-- The TP register points here.
// In this case the control structure and DTV are placed one after another right ///
// after the TLS block data. /// The offset (which can be zero) is applied to the TP only; there is never physical gap
// /// between the ABI TCB and the TLS blocks. This implies that we only need to align the TP.
// At the moment the DTV is very simple since we only support static TLS, all we ///
// need is a two word vector to hold the number of entries (1) and the address /// The first (and only) word in the ABI TCB points to the DTV.
// of the first TLS block. I_modified,
// /// Variant II:
// [1] https://www.akkadia.org/drepper/tls.pdf ///
/// ----------------------------------------
const TLSVariant = enum { /// | TLS Blocks | ABI TCB | Zig TCB | DTV |
VariantI, /// -------------^--------------------------
VariantII, /// `-- The TP register points here.
///
/// The first (and only) word in the ABI TCB points to the ABI TCB itself.
II,
}; };
const tls_variant = switch (native_arch) { const current_variant: Variant = switch (native_arch) {
.arc,
.arm, .arm,
.armeb, .armeb,
.thumb,
.thumbeb,
.aarch64, .aarch64,
.aarch64_be, .aarch64_be,
.riscv32, .csky,
.riscv64, .thumb,
.thumbeb,
=> .I_original,
.loongarch32,
.loongarch64,
.m68k,
.mips, .mips,
.mipsel, .mipsel,
.mips64, .mips64,
@ -65,73 +83,130 @@ const tls_variant = switch (native_arch) {
.powerpcle, .powerpcle,
.powerpc64, .powerpc64,
.powerpc64le, .powerpc64le,
=> TLSVariant.VariantI, .riscv32,
.x86_64, .x86, .sparc64 => TLSVariant.VariantII, .riscv64,
else => @compileError("undefined tls_variant for this architecture"), => .I_modified,
.hexagon,
.s390x,
.sparc64,
.x86,
.x86_64,
=> .II,
else => @compileError("undefined TLS variant for this architecture"),
}; };
// Controls how many bytes are reserved for the Thread Control Block /// The Offset value for the modified Variant I.
const tls_tcb_size = switch (native_arch) { const current_tp_offset = switch (native_arch) {
// ARM EABI mandates enough space for two pointers: the first one points to .m68k,
// the DTV while the second one is unspecified but reserved .mips,
.arm, .armeb, .thumb, .thumbeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize), .mipsel,
// One pointer-sized word that points either to the DTV or the TCB itself .mips64,
else => @sizeOf(usize), .mips64el,
}; .powerpc,
.powerpcle,
// Controls if the TP points to the end of the TCB instead of its beginning .powerpc64,
const tls_tp_points_past_tcb = switch (native_arch) { .powerpc64le,
.riscv32, .riscv64, .mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => true, => 0x7000,
else => false,
};
// Some architectures add some offset to the tp and dtv addresses in order to
// make the generated code more efficient
const tls_tp_offset = switch (native_arch) {
.mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => 0x7000,
else => 0, else => 0,
}; };
const tls_dtv_offset = switch (native_arch) { /// Usually only used by the modified Variant I.
.mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => 0x8000, const current_dtv_offset = switch (native_arch) {
.riscv32, .riscv64 => 0x800, .m68k,
.mips,
.mipsel,
.mips64,
.mips64el,
.powerpc,
.powerpcle,
.powerpc64,
.powerpc64le,
=> 0x8000,
.riscv32,
.riscv64,
=> 0x800,
else => 0, else => 0,
}; };
// Per-thread storage for Zig's use /// Per-thread storage for the ELF TLS ABI.
const CustomData = struct { const AbiTcb = switch (current_variant) {
.I_original, .I_modified => switch (native_arch) {
// ARM EABI mandates enough space for two pointers: the first one points to the DTV as
// usual, while the second one is unspecified.
.aarch64,
.aarch64_be,
.arm,
.armeb,
.thumb,
.thumbeb,
=> extern struct {
/// This is offset by `current_dtv_offset`.
dtv: usize,
reserved: ?*anyopaque,
},
else => extern struct {
/// This is offset by `current_dtv_offset`.
dtv: usize,
},
},
.II => extern struct {
/// This is self-referential.
self: *AbiTcb,
},
};
/// Per-thread storage for Zig's use. Currently unused.
const ZigTcb = struct {
dummy: usize, dummy: usize,
}; };
// Dynamic Thread Vector /// Dynamic Thread Vector as specified in the ELF TLS ABI. Ordinarily, there is a block pointer per
const DTV = extern struct { /// dynamically-loaded module, but since we only support static TLS, we only need one block pointer.
entries: usize, const Dtv = extern struct {
tls_block: [1][*]u8, len: usize = 1,
tls_block: [*]u8,
}; };
// Holds all the information about the process TLS image /// Describes a process's TLS area. The area encompasses the DTV, both TCBs, and the TLS block, with
const TLSImage = struct { /// the exact layout of these being dependent primarily on `current_variant`.
init_data: []const u8, const AreaDesc = struct {
alloc_size: usize, size: usize,
alloc_align: usize, alignment: usize,
tcb_offset: usize,
dtv_offset: usize, dtv: struct {
data_offset: usize, /// Offset into the TLS area.
data_size: usize, offset: usize,
// Only used on the x86 architecture },
abi_tcb: struct {
/// Offset into the TLS area.
offset: usize,
},
block: struct {
/// The initial data to be copied into the TLS block. Note that this may be smaller than
/// `size`, in which case any remaining data in the TLS block is simply left uninitialized.
init: []const u8,
/// Offset into the TLS area.
offset: usize,
/// This is the effective size of the TLS block, which may be greater than `init.len`.
size: usize,
},
/// Only used on the 32-bit x86 architecture (not x86_64, nor x32).
gdt_entry_number: usize, gdt_entry_number: usize,
}; };
pub var tls_image: TLSImage = undefined; pub var area_desc: AreaDesc = undefined;
pub fn setThreadPointer(addr: usize) void { pub fn setThreadPointer(addr: usize) void {
@setRuntimeSafety(false); @setRuntimeSafety(false);
@disableInstrumentation(); @disableInstrumentation();
switch (native_arch) { switch (native_arch) {
.x86 => { .x86 => {
var user_desc: linux.user_desc = .{ var user_desc: linux.user_desc = .{
.entry_number = tls_image.gdt_entry_number, .entry_number = area_desc.gdt_entry_number,
.base_addr = addr, .base_addr = addr,
.limit = 0xfffff, .limit = 0xfffff,
.flags = .{ .flags = .{
@ -148,7 +223,7 @@ pub fn setThreadPointer(addr: usize) void {
const gdt_entry_number = user_desc.entry_number; const gdt_entry_number = user_desc.entry_number;
// We have to keep track of our slot as it's also needed for clone() // We have to keep track of our slot as it's also needed for clone()
tls_image.gdt_entry_number = gdt_entry_number; area_desc.gdt_entry_number = gdt_entry_number;
// Update the %gs selector // Update the %gs selector
asm volatile ("movl %[gs_val], %%gs" asm volatile ("movl %[gs_val], %%gs"
: :
@ -166,10 +241,38 @@ pub fn setThreadPointer(addr: usize) void {
: [addr] "r" (addr), : [addr] "r" (addr),
); );
}, },
.arc => {
// We apparently need to both set r25 (TP) *and* inform the kernel...
asm volatile (
\\ mov r25, %[addr]
:
: [addr] "r" (addr),
);
const rc = @call(.always_inline, linux.syscall1, .{ .arc_settls, addr });
assert(rc == 0);
},
.arm, .armeb, .thumb, .thumbeb => { .arm, .armeb, .thumb, .thumbeb => {
const rc = @call(.always_inline, linux.syscall1, .{ .set_tls, addr }); const rc = @call(.always_inline, linux.syscall1, .{ .set_tls, addr });
assert(rc == 0); assert(rc == 0);
}, },
.m68k => {
const rc = linux.syscall1(.set_thread_area, addr);
assert(rc == 0);
},
.hexagon => {
asm volatile (
\\ ugp = %[addr]
:
: [addr] "r" (addr),
);
},
.loongarch32, .loongarch64 => {
asm volatile (
\\ mv tp, %[addr]
:
: [addr] "r" (addr),
);
},
.riscv32, .riscv64 => { .riscv32, .riscv64 => {
asm volatile ( asm volatile (
\\ mv tp, %[addr] \\ mv tp, %[addr]
@ -177,7 +280,7 @@ pub fn setThreadPointer(addr: usize) void {
: [addr] "r" (addr), : [addr] "r" (addr),
); );
}, },
.mips, .mipsel, .mips64, .mips64el => { .csky, .mips, .mipsel, .mips64, .mips64el => {
const rc = @call(.always_inline, linux.syscall1, .{ .set_thread_area, addr }); const rc = @call(.always_inline, linux.syscall1, .{ .set_thread_area, addr });
assert(rc == 0); assert(rc == 0);
}, },
@ -195,6 +298,17 @@ pub fn setThreadPointer(addr: usize) void {
: [addr] "r" (addr), : [addr] "r" (addr),
); );
}, },
.s390x => {
asm volatile (
\\ lgr %%r0, %[addr]
\\ sar %%a1, %%r0
\\ srlg %%r0, %%r0, 32
\\ sar %%a0, %%r0
:
: [addr] "r" (addr),
: "r0"
);
},
.sparc64 => { .sparc64 => {
asm volatile ( asm volatile (
\\ mov %[addr], %%g7 \\ mov %[addr], %%g7
@ -206,7 +320,7 @@ pub fn setThreadPointer(addr: usize) void {
} }
} }
fn initTLS(phdrs: []elf.Phdr) void { fn computeAreaDesc(phdrs: []elf.Phdr) void {
@setRuntimeSafety(false); @setRuntimeSafety(false);
@disableInstrumentation(); @disableInstrumentation();
@ -221,72 +335,103 @@ fn initTLS(phdrs: []elf.Phdr) void {
} }
} }
var tls_align_factor: usize = undefined; var align_factor: usize = undefined;
var tls_data: []const u8 = undefined; var block_init: []const u8 = undefined;
var tls_data_alloc_size: usize = undefined; var block_size: usize = undefined;
if (tls_phdr) |phdr| { if (tls_phdr) |phdr| {
// The effective size in memory is represented by p_memsz, the length of align_factor = phdr.p_align;
// the data stored in the PT_TLS segment is p_filesz and may be less
// than the former // The effective size in memory is represented by `p_memsz`; the length of the data stored
tls_align_factor = phdr.p_align; // in the `PT_TLS` segment is `p_filesz` and may be less than the former.
tls_data = @as([*]u8, @ptrFromInt(img_base + phdr.p_vaddr))[0..phdr.p_filesz]; block_init = @as([*]u8, @ptrFromInt(img_base + phdr.p_vaddr))[0..phdr.p_filesz];
tls_data_alloc_size = phdr.p_memsz; block_size = phdr.p_memsz;
} else { } else {
tls_align_factor = @alignOf(usize); align_factor = @alignOf(usize);
tls_data = &[_]u8{};
tls_data_alloc_size = 0; block_init = &[_]u8{};
block_size = 0;
} }
// Offsets into the allocated TLS area // Offsets into the allocated TLS area.
var tcb_offset: usize = undefined;
var dtv_offset: usize = undefined; var dtv_offset: usize = undefined;
var data_offset: usize = undefined; var abi_tcb_offset: usize = undefined;
// Compute the total size of the ABI-specific data plus our own control var block_offset: usize = undefined;
// structures. All the offset calculated here assume a well-aligned base
// address. // Compute the total size of the ABI-specific data plus our own `ZigTcb` structure. All the
const alloc_size = switch (tls_variant) { // offsets calculated here assume a well-aligned base address.
.VariantI => blk: { const area_size = switch (current_variant) {
.I_original => blk: {
var l: usize = 0; var l: usize = 0;
dtv_offset = l; dtv_offset = l;
l += @sizeOf(DTV); l += @sizeOf(Dtv);
// Add some padding here so that the thread pointer (tcb_offset) is // Add some padding here so that the TP (`abi_tcb_offset`) is aligned to `align_factor`
// aligned to p_align and the CustomData structure can be found by // and the `ZigTcb` structure can be found by simply subtracting `@sizeOf(ZigTcb)` from
// simply subtracting its @sizeOf from the tp value // the TP.
const delta = (l + @sizeOf(CustomData)) & (tls_align_factor - 1); const delta = (l + @sizeOf(ZigTcb)) & (align_factor - 1);
if (delta > 0) if (delta > 0)
l += tls_align_factor - delta; l += align_factor - delta;
l += @sizeOf(CustomData); l += @sizeOf(ZigTcb);
tcb_offset = l; abi_tcb_offset = l;
l += alignForward(tls_tcb_size, tls_align_factor); l += alignForward(@sizeOf(AbiTcb), align_factor);
data_offset = l; block_offset = l;
l += tls_data_alloc_size; l += block_size;
break :blk l; break :blk l;
}, },
.VariantII => blk: { .I_modified => blk: {
var l: usize = 0; var l: usize = 0;
data_offset = l;
l += alignForward(tls_data_alloc_size, tls_align_factor);
// The thread pointer is aligned to p_align
tcb_offset = l;
l += tls_tcb_size;
// The CustomData structure is right after the TCB with no padding
// in between so it can be easily found
l += @sizeOf(CustomData);
l = alignForward(l, @alignOf(DTV));
dtv_offset = l; dtv_offset = l;
l += @sizeOf(DTV); l += @sizeOf(Dtv);
// In this variant, the TLS blocks must begin immediately after the end of the ABI TCB,
// with the TP pointing to the beginning of the TLS blocks. Add padding so that the TP
// (`abi_tcb_offset`) is aligned to `align_factor` and the `ZigTcb` structure can be
// found by subtracting `@sizeOf(AbiTcb) + @sizeOf(ZigTcb)` from the TP.
const delta = (l + @sizeOf(ZigTcb) + @sizeOf(AbiTcb)) & (align_factor - 1);
if (delta > 0)
l += align_factor - delta;
l += @sizeOf(ZigTcb);
abi_tcb_offset = l;
l += @sizeOf(AbiTcb);
block_offset = l;
l += block_size;
break :blk l;
},
.II => blk: {
var l: usize = 0;
block_offset = l;
l += alignForward(block_size, align_factor);
// The TP is aligned to `align_factor`.
abi_tcb_offset = l;
l += @sizeOf(AbiTcb);
// The `ZigTcb` structure is right after the `AbiTcb` with no padding in between so it
// can be easily found.
l += @sizeOf(ZigTcb);
// It doesn't really matter where we put the DTV, so give it natural alignment.
l = alignForward(l, @alignOf(Dtv));
dtv_offset = l;
l += @sizeOf(Dtv);
break :blk l; break :blk l;
}, },
}; };
tls_image = TLSImage{ area_desc = .{
.init_data = tls_data, .size = area_size,
.alloc_size = alloc_size, .alignment = align_factor,
.alloc_align = tls_align_factor,
.tcb_offset = tcb_offset, .dtv = .{
.dtv_offset = dtv_offset, .offset = dtv_offset,
.data_offset = data_offset, },
.data_size = tls_data_alloc_size,
.abi_tcb = .{
.offset = abi_tcb_offset,
},
.block = .{
.init = block_init,
.offset = block_offset,
.size = block_size,
},
.gdt_entry_number = @as(usize, @bitCast(@as(isize, -1))), .gdt_entry_number = @as(usize, @bitCast(@as(isize, -1))),
}; };
} }
@ -306,78 +451,80 @@ inline fn alignPtrCast(comptime T: type, ptr: [*]u8) *T {
return @ptrCast(@alignCast(ptr)); return @ptrCast(@alignCast(ptr));
} }
/// Initializes all the fields of the static TLS area and returns the computed /// Initializes all the fields of the static TLS area and returns the computed architecture-specific
/// architecture-specific value of the thread-pointer register /// value of the TP register.
/// pub fn prepareArea(area: []u8) usize {
/// This function is inline because thread local storage is not set up yet.
pub fn prepareTLS(area: []u8) usize {
@setRuntimeSafety(false); @setRuntimeSafety(false);
@disableInstrumentation(); @disableInstrumentation();
// Clear the area we're going to use, just to be safe
@memset(area, 0);
// Prepare the DTV
const dtv = alignPtrCast(DTV, area.ptr + tls_image.dtv_offset);
dtv.entries = 1;
dtv.tls_block[0] = area.ptr + tls_dtv_offset + tls_image.data_offset;
// Prepare the TCB
const tcb_ptr = alignPtrCast([*]u8, area.ptr + tls_image.tcb_offset);
tcb_ptr.* = switch (tls_variant) {
.VariantI => area.ptr + tls_image.dtv_offset,
.VariantII => area.ptr + tls_image.tcb_offset,
};
// Copy the data
@memcpy(area[tls_image.data_offset..][0..tls_image.init_data.len], tls_image.init_data);
// Return the corrected value (if needed) for the tp register. // Clear the area we're going to use, just to be safe.
// Overflow here is not a problem, the pointer arithmetic involving the tp @memset(area, 0);
// is done with wrapping semantics.
return @intFromPtr(area.ptr) +% tls_tp_offset +% // Prepare the ABI TCB.
if (tls_tp_points_past_tcb) tls_image.data_offset else tls_image.tcb_offset; const abi_tcb = alignPtrCast(AbiTcb, area.ptr + area_desc.abi_tcb.offset);
switch (current_variant) {
.I_original, .I_modified => abi_tcb.dtv = @intFromPtr(area.ptr + area_desc.dtv.offset),
.II => abi_tcb.self = abi_tcb,
}
// Prepare the DTV.
const dtv = alignPtrCast(Dtv, area.ptr + area_desc.dtv.offset);
dtv.len = 1;
dtv.tls_block = area.ptr + current_dtv_offset + area_desc.block.offset;
// Copy the initial data.
@memcpy(area[area_desc.block.offset..][0..area_desc.block.init.len], area_desc.block.init);
// Return the corrected value (if needed) for the TP register. Overflow here is not a problem;
// the pointer arithmetic involving the TP is done with wrapping semantics.
return @intFromPtr(area.ptr) +% switch (current_variant) {
.I_original, .II => area_desc.abi_tcb.offset,
.I_modified => area_desc.block.offset +% current_tp_offset,
};
} }
// The main motivation for the size chosen here is this is how much ends up being // The main motivation for the size chosen here is that this is how much ends up being requested for
// requested for the thread local variables of the std.crypto.random implementation. // the thread-local variables of the `std.crypto.random` implementation. I'm not sure why it ends up
// I'm not sure why it ends up being so much; the struct itself is only 64 bytes. // being so much; the struct itself is only 64 bytes. I think it has to do with being page-aligned
// I think it has to do with being page aligned and LLVM or LLD is not smart enough // and LLVM or LLD is not smart enough to lay out the TLS data in a space-conserving way. Anyway, I
// to lay out the TLS data in a space conserving way. Anyway I think it's fine // think it's fine because it's less than 3 pages of memory, and putting it in the ELF like this is
// because it's less than 3 pages of memory, and putting it in the ELF like this // equivalent to moving the `mmap` call below into the kernel, avoiding syscall overhead.
// is equivalent to moving the mmap call below into the kernel, avoiding syscall var main_thread_area_buffer: [0x2100]u8 align(mem.page_size) = undefined;
// overhead.
var main_thread_tls_buffer: [0x2100]u8 align(mem.page_size) = undefined;
pub fn initStaticTLS(phdrs: []elf.Phdr) void { /// Computes the layout of the static TLS area, allocates the area, initializes all of its fields,
/// and assigns the architecture-specific value to the TP register.
pub fn initStatic(phdrs: []elf.Phdr) void {
@setRuntimeSafety(false); @setRuntimeSafety(false);
@disableInstrumentation(); @disableInstrumentation();
initTLS(phdrs); computeAreaDesc(phdrs);
const tls_area = blk: { const area = blk: {
// Fast path for the common case where the TLS data is really small, // Fast path for the common case where the TLS data is really small, avoid an allocation and
// avoid an allocation and use our local buffer. // use our local buffer.
if (tls_image.alloc_align <= mem.page_size and if (area_desc.alignment <= mem.page_size and area_desc.size <= main_thread_area_buffer.len) {
tls_image.alloc_size <= main_thread_tls_buffer.len) break :blk main_thread_area_buffer[0..area_desc.size];
{
break :blk main_thread_tls_buffer[0..tls_image.alloc_size];
} }
const begin_addr = mmap( const begin_addr = mmap(
null, null,
tls_image.alloc_size + tls_image.alloc_align - 1, area_desc.size + area_desc.alignment - 1,
posix.PROT.READ | posix.PROT.WRITE, posix.PROT.READ | posix.PROT.WRITE,
.{ .TYPE = .PRIVATE, .ANONYMOUS = true }, .{ .TYPE = .PRIVATE, .ANONYMOUS = true },
-1, -1,
0, 0,
); );
if (@as(isize, @bitCast(begin_addr)) < 0) @trap(); if (@as(isize, @bitCast(begin_addr)) < 0) @trap();
const alloc_tls_area: [*]align(mem.page_size) u8 = @ptrFromInt(begin_addr);
const area_ptr: [*]align(mem.page_size) u8 = @ptrFromInt(begin_addr);
// Make sure the slice is correctly aligned. // Make sure the slice is correctly aligned.
const begin_aligned_addr = alignForward(begin_addr, tls_image.alloc_align); const begin_aligned_addr = alignForward(begin_addr, area_desc.alignment);
const start = begin_aligned_addr - begin_addr; const start = begin_aligned_addr - begin_addr;
break :blk alloc_tls_area[start..][0..tls_image.alloc_size]; break :blk area_ptr[start..][0..area_desc.size];
}; };
const tp_value = prepareTLS(tls_area); const tp_value = prepareArea(area);
setThreadPointer(tp_value); setThreadPointer(tp_value);
} }

View File

@ -469,7 +469,7 @@ fn posixCallMainAndExit(argc_argv_ptr: [*]usize) callconv(.C) noreturn {
} }
// Initialize the TLS area. // Initialize the TLS area.
std.os.linux.tls.initStaticTLS(phdrs); std.os.linux.tls.initStatic(phdrs);
} }
// The way Linux executables represent stack size is via the PT_GNU_STACK // The way Linux executables represent stack size is via the PT_GNU_STACK