Merge pull request #20857 from alexrp/tls-porting

`std.os.linux.tls`: Refactor, improve documentation, fix a bug, and port to more architectures
2025-12-06 14:23:09 +00:00 · 2024-08-01 01:15:17 -07:00 · 2024-08-01 01:15:17 -07:00 · 91163b44dd
commit 91163b44dd
parent 16dde6d260 36332a4fdc
3 changed files with 340 additions and 193 deletions
--- a/lib/std/Thread.zig
+++ b/lib/std/Thread.zig
@ -1261,9 +1261,9 @@ const LinuxThreadImpl = struct {
            bytes = std.mem.alignForward(usize, bytes, page_size);
            stack_offset = bytes;
-            bytes = std.mem.alignForward(usize, bytes, linux.tls.tls_image.alloc_align);
+            bytes = std.mem.alignForward(usize, bytes, linux.tls.area_desc.alignment);
            tls_offset = bytes;
-            bytes += linux.tls.tls_image.alloc_size;
+            bytes += linux.tls.area_desc.size;
            bytes = std.mem.alignForward(usize, bytes, @alignOf(Instance));
            instance_offset = bytes;
@ -1304,12 +1304,12 @@ const LinuxThreadImpl = struct {
        };
        // Prepare the TLS segment and prepare a user_desc struct when needed on x86
-        var tls_ptr = linux.tls.prepareTLS(mapped[tls_offset..]);
+        var tls_ptr = linux.tls.prepareArea(mapped[tls_offset..]);
        var user_desc: if (target.cpu.arch == .x86) linux.user_desc else void = undefined;
        if (target.cpu.arch == .x86) {
            defer tls_ptr = @intFromPtr(&user_desc);
            user_desc = .{
-                .entry_number = linux.tls.tls_image.gdt_entry_number,
+                .entry_number = linux.tls.area_desc.gdt_entry_number,
                .base_addr = tls_ptr,
                .limit = 0xfffff,
                .flags = .{
--- a/lib/std/os/linux/tls.zig
+++ b/lib/std/os/linux/tls.zig
@ -1,3 +1,14 @@
 //! This file implements the two TLS variants [1] used by ELF-based systems. Note that, in reality,
 //! Variant I has two sub-variants.
 //!
 //! It is important to understand that the term TCB (Thread Control Block) is overloaded here.
 //! Official ABI documentation uses it simply to mean the ABI TCB, i.e. a small area of ABI-defined
 //! data, usually one or two words (see the `AbiTcb` type below). People will also often use TCB to
 //! refer to the libc TCB, which can be any size and contain anything. (One could even omit it!) We
 //! refer to the latter as the Zig TCB; see the `ZigTcb` type below.
 //!
 //! [1] https://www.akkadia.org/drepper/tls.pdf
 const std = @import("std");
 const mem = std.mem;
 const elf = std.elf;
@ -7,56 +18,63 @@ const native_arch = @import("builtin").cpu.arch;
 const linux = std.os.linux;
 const posix = std.posix;
-// This file implements the two TLS variants [1] used by ELF-based systems.
+/// Represents an ELF TLS variant.
-//
+///
-// The variant I has the following layout in memory:
+/// In all variants, the TP and the TLS blocks must be aligned to the `p_align` value in the
-// -------------------------------------------------------
+/// `PT_TLS` ELF program header. Everything else has natural alignment.
-// |   DTV   |     Zig     |   DTV   | Alignment |  TLS  |
+///
-// | storage | thread data | pointer |           | block |
+/// The location of the DTV does not actually matter. For simplicity, we put it in the TLS area, but
-// ------------------------^------------------------------
+/// there is no actual ABI requirement that it reside there.
-//                         `-- The thread pointer register points here
+const Variant = enum {
-//
+    /// The original Variant I:
-// In this case we allocate additional space for our control structure that's
+    ///
-// placed _before_ the DTV pointer together with the DTV.
+    /// ----------------------------------------
-//
+    /// | DTV | Zig TCB | ABI TCB | TLS Blocks |
-// NOTE: Some systems such as power64 or mips use this variant with a twist: the
+    /// ----------------^-----------------------
-// alignment is not present and the tp and DTV addresses are offset by a
+    ///                 `-- The TP register points here.
-// constant.
+    ///
-//
+    /// The layout in this variant necessitates separate alignment of both the TP and the TLS
-// On the other hand the variant II has the following layout in memory:
+    /// blocks.
-// ---------------------------------------
+    ///
-// |  TLS  | TCB |     Zig     |   DTV   |
+    /// The first word in the ABI TCB points to the DTV. For some architectures, there may be a
-// | block |     | thread data | storage |
+    /// second word with an unspecified meaning.
-// --------^------------------------------
+    I_original,
-//         `-- The thread pointer register points here
+    /// The modified Variant I:
-//
+    ///
-// The structure of the TCB is not defined by the ABI so we reserve enough space
+    /// ---------------------------------------------------
-// for a single pointer as some architectures such as x86 and x86_64 need a
+    /// | DTV | Zig TCB | ABI TCB | [Offset] | TLS Blocks |
-// pointer to the TCB block itself at the address pointed by the tp.
+    /// -------------------------------------^-------------
-//
+    ///                                      `-- The TP register points here.
-// In this case the control structure and DTV are placed one after another right
+    ///
-// after the TLS block data.
+    /// The offset (which can be zero) is applied to the TP only; there is never physical gap
-//
+    /// between the ABI TCB and the TLS blocks. This implies that we only need to align the TP.
-// At the moment the DTV is very simple since we only support static TLS, all we
+    ///
-// need is a two word vector to hold the number of entries (1) and the address
+    /// The first (and only) word in the ABI TCB points to the DTV.
-// of the first TLS block.
+    I_modified,
-//
+    /// Variant II:
-// [1] https://www.akkadia.org/drepper/tls.pdf
+    ///
-
+    /// ----------------------------------------
-const TLSVariant = enum {
+    /// | TLS Blocks | ABI TCB | Zig TCB | DTV |
-    VariantI,
+    /// -------------^--------------------------
-    VariantII,
+    ///              `-- The TP register points here.
    ///
    /// The first (and only) word in the ABI TCB points to the ABI TCB itself.
    II,
 };
-const tls_variant = switch (native_arch) {
+const current_variant: Variant = switch (native_arch) {
    .arc,
    .arm,
    .armeb,
    .thumb,
    .thumbeb,
    .aarch64,
    .aarch64_be,
-    .riscv32,
+    .csky,
-    .riscv64,
+    .thumb,
    .thumbeb,
    => .I_original,
    .loongarch32,
    .loongarch64,
    .m68k,
    .mips,
    .mipsel,
    .mips64,
@ -65,73 +83,130 @@ const tls_variant = switch (native_arch) {
    .powerpcle,
    .powerpc64,
    .powerpc64le,
-    => TLSVariant.VariantI,
+    .riscv32,
-    .x86_64, .x86, .sparc64 => TLSVariant.VariantII,
+    .riscv64,
-    else => @compileError("undefined tls_variant for this architecture"),
+    => .I_modified,
    .hexagon,
    .s390x,
    .sparc64,
    .x86,
    .x86_64,
    => .II,
    else => @compileError("undefined TLS variant for this architecture"),
 };
-// Controls how many bytes are reserved for the Thread Control Block
+/// The Offset value for the modified Variant I.
-const tls_tcb_size = switch (native_arch) {
+const current_tp_offset = switch (native_arch) {
-    // ARM EABI mandates enough space for two pointers: the first one points to
+    .m68k,
-    // the DTV while the second one is unspecified but reserved
+    .mips,
-    .arm, .armeb, .thumb, .thumbeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
+    .mipsel,
-    // One pointer-sized word that points either to the DTV or the TCB itself
+    .mips64,
-    else => @sizeOf(usize),
+    .mips64el,
-};
+    .powerpc,
-
+    .powerpcle,
-// Controls if the TP points to the end of the TCB instead of its beginning
+    .powerpc64,
-const tls_tp_points_past_tcb = switch (native_arch) {
+    .powerpc64le,
-    .riscv32, .riscv64, .mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => true,
+    => 0x7000,
    else => false,
 };
 // Some architectures add some offset to the tp and dtv addresses in order to
 // make the generated code more efficient
 const tls_tp_offset = switch (native_arch) {
    .mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => 0x7000,
    else => 0,
 };
-const tls_dtv_offset = switch (native_arch) {
+/// Usually only used by the modified Variant I.
-    .mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => 0x8000,
+const current_dtv_offset = switch (native_arch) {
-    .riscv32, .riscv64 => 0x800,
+    .m68k,
    .mips,
    .mipsel,
    .mips64,
    .mips64el,
    .powerpc,
    .powerpcle,
    .powerpc64,
    .powerpc64le,
    => 0x8000,
    .riscv32,
    .riscv64,
    => 0x800,
    else => 0,
 };
-// Per-thread storage for Zig's use
+/// Per-thread storage for the ELF TLS ABI.
-const CustomData = struct {
+const AbiTcb = switch (current_variant) {
    .I_original, .I_modified => switch (native_arch) {
        // ARM EABI mandates enough space for two pointers: the first one points to the DTV as
        // usual, while the second one is unspecified.
        .aarch64,
        .aarch64_be,
        .arm,
        .armeb,
        .thumb,
        .thumbeb,
        => extern struct {
            /// This is offset by `current_dtv_offset`.
            dtv: usize,
            reserved: ?*anyopaque,
        },
        else => extern struct {
            /// This is offset by `current_dtv_offset`.
            dtv: usize,
        },
    },
    .II => extern struct {
        /// This is self-referential.
        self: *AbiTcb,
    },
 };
 /// Per-thread storage for Zig's use. Currently unused.
 const ZigTcb = struct {
    dummy: usize,
 };
-// Dynamic Thread Vector
+/// Dynamic Thread Vector as specified in the ELF TLS ABI. Ordinarily, there is a block pointer per
-const DTV = extern struct {
+/// dynamically-loaded module, but since we only support static TLS, we only need one block pointer.
-    entries: usize,
+const Dtv = extern struct {
-    tls_block: [1][*]u8,
+    len: usize = 1,
    tls_block: [*]u8,
 };
-// Holds all the information about the process TLS image
+/// Describes a process's TLS area. The area encompasses the DTV, both TCBs, and the TLS block, with
-const TLSImage = struct {
+/// the exact layout of these being dependent primarily on `current_variant`.
-    init_data: []const u8,
+const AreaDesc = struct {
-    alloc_size: usize,
+    size: usize,
-    alloc_align: usize,
+    alignment: usize,
-    tcb_offset: usize,
+
-    dtv_offset: usize,
+    dtv: struct {
-    data_offset: usize,
+        /// Offset into the TLS area.
-    data_size: usize,
+        offset: usize,
-    // Only used on the x86 architecture
+    },
    abi_tcb: struct {
        /// Offset into the TLS area.
        offset: usize,
    },
    block: struct {
        /// The initial data to be copied into the TLS block. Note that this may be smaller than
        /// `size`, in which case any remaining data in the TLS block is simply left uninitialized.
        init: []const u8,
        /// Offset into the TLS area.
        offset: usize,
        /// This is the effective size of the TLS block, which may be greater than `init.len`.
        size: usize,
    },
    /// Only used on the 32-bit x86 architecture (not x86_64, nor x32).
    gdt_entry_number: usize,
 };
-pub var tls_image: TLSImage = undefined;
+pub var area_desc: AreaDesc = undefined;
 pub fn setThreadPointer(addr: usize) void {
    @setRuntimeSafety(false);
    @disableInstrumentation();
    switch (native_arch) {
        .x86 => {
            var user_desc: linux.user_desc = .{
-                .entry_number = tls_image.gdt_entry_number,
+                .entry_number = area_desc.gdt_entry_number,
                .base_addr = addr,
                .limit = 0xfffff,
                .flags = .{
@ -148,7 +223,7 @@ pub fn setThreadPointer(addr: usize) void {
            const gdt_entry_number = user_desc.entry_number;
            // We have to keep track of our slot as it's also needed for clone()
-            tls_image.gdt_entry_number = gdt_entry_number;
+            area_desc.gdt_entry_number = gdt_entry_number;
            // Update the %gs selector
            asm volatile ("movl %[gs_val], %%gs"
                :
@ -166,10 +241,38 @@ pub fn setThreadPointer(addr: usize) void {
                : [addr] "r" (addr),
            );
        },
        .arc => {
            // We apparently need to both set r25 (TP) *and* inform the kernel...
            asm volatile (
                \\ mov r25, %[addr]
                :
                : [addr] "r" (addr),
            );
            const rc = @call(.always_inline, linux.syscall1, .{ .arc_settls, addr });
            assert(rc == 0);
        },
        .arm, .armeb, .thumb, .thumbeb => {
            const rc = @call(.always_inline, linux.syscall1, .{ .set_tls, addr });
            assert(rc == 0);
        },
        .m68k => {
            const rc = linux.syscall1(.set_thread_area, addr);
            assert(rc == 0);
        },
        .hexagon => {
            asm volatile (
                \\ ugp = %[addr]
                :
                : [addr] "r" (addr),
            );
        },
        .loongarch32, .loongarch64 => {
            asm volatile (
                \\ mv tp, %[addr]
                :
                : [addr] "r" (addr),
            );
        },
        .riscv32, .riscv64 => {
            asm volatile (
                \\ mv tp, %[addr]
@ -177,7 +280,7 @@ pub fn setThreadPointer(addr: usize) void {
                : [addr] "r" (addr),
            );
        },
-        .mips, .mipsel, .mips64, .mips64el => {
+        .csky, .mips, .mipsel, .mips64, .mips64el => {
            const rc = @call(.always_inline, linux.syscall1, .{ .set_thread_area, addr });
            assert(rc == 0);
        },
@ -195,6 +298,17 @@ pub fn setThreadPointer(addr: usize) void {
                : [addr] "r" (addr),
            );
        },
        .s390x => {
            asm volatile (
                \\ lgr %%r0, %[addr]
                \\ sar %%a1, %%r0
                \\ srlg %%r0, %%r0, 32
                \\ sar %%a0, %%r0
                :
                : [addr] "r" (addr),
                : "r0"
            );
        },
        .sparc64 => {
            asm volatile (
                \\ mov %[addr], %%g7
@ -206,7 +320,7 @@ pub fn setThreadPointer(addr: usize) void {
    }
 }
-fn initTLS(phdrs: []elf.Phdr) void {
+fn computeAreaDesc(phdrs: []elf.Phdr) void {
    @setRuntimeSafety(false);
    @disableInstrumentation();
@ -221,72 +335,103 @@ fn initTLS(phdrs: []elf.Phdr) void {
        }
    }
-    var tls_align_factor: usize = undefined;
+    var align_factor: usize = undefined;
-    var tls_data: []const u8 = undefined;
+    var block_init: []const u8 = undefined;
-    var tls_data_alloc_size: usize = undefined;
+    var block_size: usize = undefined;
    if (tls_phdr) |phdr| {
-        // The effective size in memory is represented by p_memsz, the length of
+        align_factor = phdr.p_align;
-        // the data stored in the PT_TLS segment is p_filesz and may be less
+
-        // than the former
+        // The effective size in memory is represented by `p_memsz`; the length of the data stored
-        tls_align_factor = phdr.p_align;
+        // in the `PT_TLS` segment is `p_filesz` and may be less than the former.
-        tls_data = @as([*]u8, @ptrFromInt(img_base + phdr.p_vaddr))[0..phdr.p_filesz];
+        block_init = @as([*]u8, @ptrFromInt(img_base + phdr.p_vaddr))[0..phdr.p_filesz];
-        tls_data_alloc_size = phdr.p_memsz;
+        block_size = phdr.p_memsz;
    } else {
-        tls_align_factor = @alignOf(usize);
+        align_factor = @alignOf(usize);
-        tls_data = &[_]u8{};
+
-        tls_data_alloc_size = 0;
+        block_init = &[_]u8{};
        block_size = 0;
    }
-    // Offsets into the allocated TLS area
+    // Offsets into the allocated TLS area.
    var tcb_offset: usize = undefined;
    var dtv_offset: usize = undefined;
-    var data_offset: usize = undefined;
+    var abi_tcb_offset: usize = undefined;
-    // Compute the total size of the ABI-specific data plus our own control
+    var block_offset: usize = undefined;
-    // structures. All the offset calculated here assume a well-aligned base
+
-    // address.
+    // Compute the total size of the ABI-specific data plus our own `ZigTcb` structure. All the
-    const alloc_size = switch (tls_variant) {
+    // offsets calculated here assume a well-aligned base address.
-        .VariantI => blk: {
+    const area_size = switch (current_variant) {
        .I_original => blk: {
            var l: usize = 0;
            dtv_offset = l;
-            l += @sizeOf(DTV);
+            l += @sizeOf(Dtv);
-            // Add some padding here so that the thread pointer (tcb_offset) is
+            // Add some padding here so that the TP (`abi_tcb_offset`) is aligned to `align_factor`
-            // aligned to p_align and the CustomData structure can be found by
+            // and the `ZigTcb` structure can be found by simply subtracting `@sizeOf(ZigTcb)` from
-            // simply subtracting its @sizeOf from the tp value
+            // the TP.
-            const delta = (l + @sizeOf(CustomData)) & (tls_align_factor - 1);
+            const delta = (l + @sizeOf(ZigTcb)) & (align_factor - 1);
            if (delta > 0)
-                l += tls_align_factor - delta;
+                l += align_factor - delta;
-            l += @sizeOf(CustomData);
+            l += @sizeOf(ZigTcb);
-            tcb_offset = l;
+            abi_tcb_offset = l;
-            l += alignForward(tls_tcb_size, tls_align_factor);
+            l += alignForward(@sizeOf(AbiTcb), align_factor);
-            data_offset = l;
+            block_offset = l;
-            l += tls_data_alloc_size;
+            l += block_size;
            break :blk l;
        },
-        .VariantII => blk: {
+        .I_modified => blk: {
            var l: usize = 0;
            data_offset = l;
            l += alignForward(tls_data_alloc_size, tls_align_factor);
            // The thread pointer is aligned to p_align
            tcb_offset = l;
            l += tls_tcb_size;
            // The CustomData structure is right after the TCB with no padding
            // in between so it can be easily found
            l += @sizeOf(CustomData);
            l = alignForward(l, @alignOf(DTV));
            dtv_offset = l;
-            l += @sizeOf(DTV);
+            l += @sizeOf(Dtv);
            // In this variant, the TLS blocks must begin immediately after the end of the ABI TCB,
            // with the TP pointing to the beginning of the TLS blocks. Add padding so that the TP
            // (`abi_tcb_offset`) is aligned to `align_factor` and the `ZigTcb` structure can be
            // found by subtracting `@sizeOf(AbiTcb) + @sizeOf(ZigTcb)` from the TP.
            const delta = (l + @sizeOf(ZigTcb) + @sizeOf(AbiTcb)) & (align_factor - 1);
            if (delta > 0)
                l += align_factor - delta;
            l += @sizeOf(ZigTcb);
            abi_tcb_offset = l;
            l += @sizeOf(AbiTcb);
            block_offset = l;
            l += block_size;
            break :blk l;
        },
        .II => blk: {
            var l: usize = 0;
            block_offset = l;
            l += alignForward(block_size, align_factor);
            // The TP is aligned to `align_factor`.
            abi_tcb_offset = l;
            l += @sizeOf(AbiTcb);
            // The `ZigTcb` structure is right after the `AbiTcb` with no padding in between so it
            // can be easily found.
            l += @sizeOf(ZigTcb);
            // It doesn't really matter where we put the DTV, so give it natural alignment.
            l = alignForward(l, @alignOf(Dtv));
            dtv_offset = l;
            l += @sizeOf(Dtv);
            break :blk l;
        },
    };
-    tls_image = TLSImage{
+    area_desc = .{
-        .init_data = tls_data,
+        .size = area_size,
-        .alloc_size = alloc_size,
+        .alignment = align_factor,
-        .alloc_align = tls_align_factor,
+
-        .tcb_offset = tcb_offset,
+        .dtv = .{
-        .dtv_offset = dtv_offset,
+            .offset = dtv_offset,
-        .data_offset = data_offset,
+        },
-        .data_size = tls_data_alloc_size,
+
        .abi_tcb = .{
            .offset = abi_tcb_offset,
        },
        .block = .{
            .init = block_init,
            .offset = block_offset,
            .size = block_size,
        },
        .gdt_entry_number = @as(usize, @bitCast(@as(isize, -1))),
    };
 }
@ -306,78 +451,80 @@ inline fn alignPtrCast(comptime T: type, ptr: [*]u8) *T {
    return @ptrCast(@alignCast(ptr));
 }
-/// Initializes all the fields of the static TLS area and returns the computed
+/// Initializes all the fields of the static TLS area and returns the computed architecture-specific
-/// architecture-specific value of the thread-pointer register
+/// value of the TP register.
-///
+pub fn prepareArea(area: []u8) usize {
 /// This function is inline because thread local storage is not set up yet.
 pub fn prepareTLS(area: []u8) usize {
    @setRuntimeSafety(false);
    @disableInstrumentation();
    // Clear the area we're going to use, just to be safe
    @memset(area, 0);
    // Prepare the DTV
    const dtv = alignPtrCast(DTV, area.ptr + tls_image.dtv_offset);
    dtv.entries = 1;
    dtv.tls_block[0] = area.ptr + tls_dtv_offset + tls_image.data_offset;
    // Prepare the TCB
    const tcb_ptr = alignPtrCast([*]u8, area.ptr + tls_image.tcb_offset);
    tcb_ptr.* = switch (tls_variant) {
        .VariantI => area.ptr + tls_image.dtv_offset,
        .VariantII => area.ptr + tls_image.tcb_offset,
    };
    // Copy the data
    @memcpy(area[tls_image.data_offset..][0..tls_image.init_data.len], tls_image.init_data);
-    // Return the corrected value (if needed) for the tp register.
+    // Clear the area we're going to use, just to be safe.
-    // Overflow here is not a problem, the pointer arithmetic involving the tp
+    @memset(area, 0);
-    // is done with wrapping semantics.
+
-    return @intFromPtr(area.ptr) +% tls_tp_offset +%
+    // Prepare the ABI TCB.
-        if (tls_tp_points_past_tcb) tls_image.data_offset else tls_image.tcb_offset;
+    const abi_tcb = alignPtrCast(AbiTcb, area.ptr + area_desc.abi_tcb.offset);
    switch (current_variant) {
        .I_original, .I_modified => abi_tcb.dtv = @intFromPtr(area.ptr + area_desc.dtv.offset),
        .II => abi_tcb.self = abi_tcb,
    }
    // Prepare the DTV.
    const dtv = alignPtrCast(Dtv, area.ptr + area_desc.dtv.offset);
    dtv.len = 1;
    dtv.tls_block = area.ptr + current_dtv_offset + area_desc.block.offset;
    // Copy the initial data.
    @memcpy(area[area_desc.block.offset..][0..area_desc.block.init.len], area_desc.block.init);
    // Return the corrected value (if needed) for the TP register. Overflow here is not a problem;
    // the pointer arithmetic involving the TP is done with wrapping semantics.
    return @intFromPtr(area.ptr) +% switch (current_variant) {
        .I_original, .II => area_desc.abi_tcb.offset,
        .I_modified => area_desc.block.offset +% current_tp_offset,
    };
 }
-// The main motivation for the size chosen here is this is how much ends up being
+// The main motivation for the size chosen here is that this is how much ends up being requested for
-// requested for the thread local variables of the std.crypto.random implementation.
+// the thread-local variables of the `std.crypto.random` implementation. I'm not sure why it ends up
-// I'm not sure why it ends up being so much; the struct itself is only 64 bytes.
+// being so much; the struct itself is only 64 bytes. I think it has to do with being page-aligned
-// I think it has to do with being page aligned and LLVM or LLD is not smart enough
+// and LLVM or LLD is not smart enough to lay out the TLS data in a space-conserving way. Anyway, I
-// to lay out the TLS data in a space conserving way. Anyway I think it's fine
+// think it's fine because it's less than 3 pages of memory, and putting it in the ELF like this is
-// because it's less than 3 pages of memory, and putting it in the ELF like this
+// equivalent to moving the `mmap` call below into the kernel, avoiding syscall overhead.
-// is equivalent to moving the mmap call below into the kernel, avoiding syscall
+var main_thread_area_buffer: [0x2100]u8 align(mem.page_size) = undefined;
 // overhead.
 var main_thread_tls_buffer: [0x2100]u8 align(mem.page_size) = undefined;
-pub fn initStaticTLS(phdrs: []elf.Phdr) void {
+/// Computes the layout of the static TLS area, allocates the area, initializes all of its fields,
 /// and assigns the architecture-specific value to the TP register.
 pub fn initStatic(phdrs: []elf.Phdr) void {
    @setRuntimeSafety(false);
    @disableInstrumentation();
-    initTLS(phdrs);
+    computeAreaDesc(phdrs);
-    const tls_area = blk: {
+    const area = blk: {
-        // Fast path for the common case where the TLS data is really small,
+        // Fast path for the common case where the TLS data is really small, avoid an allocation and
-        // avoid an allocation and use our local buffer.
+        // use our local buffer.
-        if (tls_image.alloc_align <= mem.page_size and
+        if (area_desc.alignment <= mem.page_size and area_desc.size <= main_thread_area_buffer.len) {
-            tls_image.alloc_size <= main_thread_tls_buffer.len)
+            break :blk main_thread_area_buffer[0..area_desc.size];
        {
            break :blk main_thread_tls_buffer[0..tls_image.alloc_size];
        }
        const begin_addr = mmap(
            null,
-            tls_image.alloc_size + tls_image.alloc_align - 1,
+            area_desc.size + area_desc.alignment - 1,
            posix.PROT.READ | posix.PROT.WRITE,
            .{ .TYPE = .PRIVATE, .ANONYMOUS = true },
            -1,
            0,
        );
        if (@as(isize, @bitCast(begin_addr)) < 0) @trap();
-        const alloc_tls_area: [*]align(mem.page_size) u8 = @ptrFromInt(begin_addr);
+
        const area_ptr: [*]align(mem.page_size) u8 = @ptrFromInt(begin_addr);
        // Make sure the slice is correctly aligned.
-        const begin_aligned_addr = alignForward(begin_addr, tls_image.alloc_align);
+        const begin_aligned_addr = alignForward(begin_addr, area_desc.alignment);
        const start = begin_aligned_addr - begin_addr;
-        break :blk alloc_tls_area[start..][0..tls_image.alloc_size];
+        break :blk area_ptr[start..][0..area_desc.size];
    };
-    const tp_value = prepareTLS(tls_area);
+    const tp_value = prepareArea(area);
    setThreadPointer(tp_value);
 }
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@ -469,7 +469,7 @@ fn posixCallMainAndExit(argc_argv_ptr: [*]usize) callconv(.C) noreturn {
            }
            // Initialize the TLS area.
-            std.os.linux.tls.initStaticTLS(phdrs);
+            std.os.linux.tls.initStatic(phdrs);
        }
        // The way Linux executables represent stack size is via the PT_GNU_STACK