Merge pull request #9168 from LemonBoy/fix-pie

std: Fix PIE startup sequence
This commit is contained in:
Andrew Kelley 2021-06-20 20:01:39 -04:00 committed by GitHub
commit fc1feebdc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 106 additions and 115 deletions

View File

@ -34,6 +34,7 @@ pub usingnamespace switch (native_arch) {
};
pub usingnamespace @import("bits.zig");
pub const tls = @import("linux/tls.zig");
pub const pie = @import("linux/start_pie.zig");
pub const BPF = @import("linux/bpf.zig");
pub usingnamespace @import("linux/io_uring.zig");

View File

@ -8,33 +8,35 @@ const R_386_RELATIVE = 8;
const R_ARM_RELATIVE = 23;
const R_AARCH64_RELATIVE = 1027;
const R_RISCV_RELATIVE = 3;
const R_SPARC_RELATIVE = 22;
const ARCH_RELATIVE_RELOC = switch (builtin.cpu.arch) {
const R_RELATIVE = switch (builtin.cpu.arch) {
.i386 => R_386_RELATIVE,
.x86_64 => R_AMD64_RELATIVE,
.arm => R_ARM_RELATIVE,
.aarch64 => R_AARCH64_RELATIVE,
.riscv64 => R_RISCV_RELATIVE,
else => @compileError("unsupported architecture"),
else => @compileError("Missing R_RELATIVE definition for this target"),
};
// Just a convoluted (but necessary) way to obtain the address of the _DYNAMIC[]
// vector as PC-relative so that we can use it before any relocation is applied
// Obtain a pointer to the _DYNAMIC array.
// We have to compute its address as a PC-relative quantity not to require a
// relocation that, at this point, is not yet applied.
fn getDynamicSymbol() [*]elf.Dyn {
const addr = switch (builtin.cpu.arch) {
return switch (builtin.cpu.arch) {
.i386 => asm volatile (
\\ .weak _DYNAMIC
\\ .hidden _DYNAMIC
\\ call 1f
\\ 1: pop %[ret]
\\ lea _DYNAMIC-1b(%[ret]), %[ret]
: [ret] "=r" (-> usize)
: [ret] "=r" (-> [*]elf.Dyn)
),
.x86_64 => asm volatile (
\\ .weak _DYNAMIC
\\ .hidden _DYNAMIC
\\ lea _DYNAMIC(%%rip), %[ret]
: [ret] "=r" (-> usize)
: [ret] "=r" (-> [*]elf.Dyn)
),
// Work around the limited offset range of `ldr`
.arm => asm volatile (
@ -45,7 +47,7 @@ fn getDynamicSymbol() [*]elf.Dyn {
\\ b 2f
\\ 1: .word _DYNAMIC-1b
\\ 2:
: [ret] "=r" (-> usize)
: [ret] "=r" (-> [*]elf.Dyn)
),
// A simple `adr` is not enough as it has a limited offset range
.aarch64 => asm volatile (
@ -53,61 +55,39 @@ fn getDynamicSymbol() [*]elf.Dyn {
\\ .hidden _DYNAMIC
\\ adrp %[ret], _DYNAMIC
\\ add %[ret], %[ret], #:lo12:_DYNAMIC
: [ret] "=r" (-> usize)
: [ret] "=r" (-> [*]elf.Dyn)
),
.riscv64 => asm volatile (
\\ .weak _DYNAMIC
\\ .hidden _DYNAMIC
\\ lla %[ret], _DYNAMIC
: [ret] "=r" (-> usize)
: [ret] "=r" (-> [*]elf.Dyn)
),
else => @compileError("???"),
else => {
@compileError("PIE startup is not yet supported for this target!");
},
};
return @intToPtr([*]elf.Dyn, addr);
}
pub fn apply_relocations() void {
pub fn relocate(phdrs: []elf.Phdr) void {
@setRuntimeSafety(false);
const dynv = getDynamicSymbol();
const auxv = std.os.linux.elf_aux_maybe.?;
var at_phent: usize = undefined;
var at_phnum: usize = undefined;
var at_phdr: usize = undefined;
var at_hwcap: usize = undefined;
{
var i: usize = 0;
while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
switch (auxv[i].a_type) {
elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
else => continue,
}
}
}
// Sanity check
assert(at_phent == @sizeOf(elf.Phdr));
// Search the TLS section
const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
const base_addr = blk: {
// Recover the delta applied by the loader by comparing the effective and
// the theoretical load addresses for the `_DYNAMIC` symbol.
const base_addr = base: {
for (phdrs) |*phdr| {
if (phdr.p_type == elf.PT_DYNAMIC) {
break :blk @ptrToInt(&dynv[0]) - phdr.p_vaddr;
}
if (phdr.p_type != elf.PT_DYNAMIC) continue;
break :base @ptrToInt(dynv) - phdr.p_vaddr;
}
unreachable;
// This is not supposed to happen for well-formed binaries.
std.os.abort();
};
var rel_addr: usize = 0;
var rela_addr: usize = 0;
var rel_size: usize = 0;
var rela_size: usize = 0;
{
var i: usize = 0;
while (dynv[i].d_tag != elf.DT_NULL) : (i += 1) {
@ -121,18 +101,18 @@ pub fn apply_relocations() void {
}
}
// Perform the relocations
// Apply the relocations.
if (rel_addr != 0) {
const rel = std.mem.bytesAsSlice(elf.Rel, @intToPtr([*]u8, rel_addr)[0..rel_size]);
for (rel) |r| {
if (r.r_type() != ARCH_RELATIVE_RELOC) continue;
if (r.r_type() != R_RELATIVE) continue;
@intToPtr(*usize, base_addr + r.r_offset).* += base_addr;
}
}
if (rela_addr != 0) {
const rela = std.mem.bytesAsSlice(elf.Rela, @intToPtr([*]u8, rela_addr)[0..rela_size]);
for (rela) |r| {
if (r.r_type() != ARCH_RELATIVE_RELOC) continue;
if (r.r_type() != R_RELATIVE) continue;
@intToPtr(*usize, base_addr + r.r_offset).* += base_addr + @bitCast(usize, r.r_addend);
}
}

View File

@ -190,53 +190,18 @@ pub fn setThreadPointer(addr: usize) void {
}
}
fn initTLS() void {
fn initTLS(phdrs: []elf.Phdr) void {
var tls_phdr: ?*elf.Phdr = null;
var img_base: usize = 0;
const auxv = std.os.linux.elf_aux_maybe.?;
var at_phent: usize = undefined;
var at_phnum: usize = undefined;
var at_phdr: usize = undefined;
var at_hwcap: usize = undefined;
var i: usize = 0;
while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
switch (auxv[i].a_type) {
elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val,
else => continue,
}
}
// Sanity check
assert(at_phent == @sizeOf(elf.Phdr));
// Find the TLS section
const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
for (phdrs) |*phdr| {
switch (phdr.p_type) {
elf.PT_PHDR => img_base = at_phdr - phdr.p_vaddr,
elf.PT_PHDR => img_base = @ptrToInt(phdrs.ptr) - phdr.p_vaddr,
elf.PT_TLS => tls_phdr = phdr,
else => {},
}
}
// ARMv6 targets (and earlier) have no support for TLS in hardware
// FIXME: Elide the check for targets >= ARMv7 when the target feature API
// becomes less verbose (and more usable).
if (comptime native_arch.isARM()) {
if (at_hwcap & std.os.linux.HWCAP_TLS == 0) {
// FIXME: Make __aeabi_read_tp call the kernel helper kuser_get_tls
// For the time being use a simple abort instead of a @panic call to
// keep the binary bloat under control.
std.os.abort();
}
}
var tls_align_factor: usize = undefined;
var tls_data: []const u8 = undefined;
var tls_data_alloc_size: usize = undefined;
@ -344,8 +309,8 @@ pub fn prepareTLS(area: []u8) usize {
// overhead.
var main_thread_tls_buffer: [0x2100]u8 align(mem.page_size) = undefined;
pub fn initStaticTLS() void {
initTLS();
pub fn initStaticTLS(phdrs: []elf.Phdr) void {
initTLS(phdrs);
const tls_area = blk: {
// Fast path for the common case where the TLS data is really small,

View File

@ -10,6 +10,7 @@ const std = @import("std.zig");
const builtin = @import("builtin");
const assert = std.debug.assert;
const uefi = std.os.uefi;
const elf = std.elf;
const tlcsprng = @import("crypto/tlcsprng.zig");
const native_arch = builtin.cpu.arch;
const native_os = builtin.os.tag;
@ -281,49 +282,60 @@ fn posixCallMainAndExit() noreturn {
if (native_os == .linux) {
// Find the beginning of the auxiliary vector
const auxv = @ptrCast([*]std.elf.Auxv, @alignCast(@alignOf(usize), envp.ptr + envp_count + 1));
const auxv = @ptrCast([*]elf.Auxv, @alignCast(@alignOf(usize), envp.ptr + envp_count + 1));
std.os.linux.elf_aux_maybe = auxv;
// Do this as early as possible, the aux vector is needed
var at_hwcap: usize = 0;
const phdrs = init: {
var i: usize = 0;
var at_phdr: usize = 0;
var at_phnum: usize = 0;
while (auxv[i].a_type != elf.AT_NULL) : (i += 1) {
switch (auxv[i].a_type) {
elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val,
else => continue,
}
}
break :init @intToPtr([*]elf.Phdr, at_phdr)[0..at_phnum];
};
// Apply the initial relocations as early as possible in the startup
// process.
if (builtin.position_independent_executable) {
@import("os/linux/start_pie.zig").apply_relocations();
std.os.linux.pie.relocate(phdrs);
}
// Initialize the TLS area. We do a runtime check here to make sure
// this code is truly being statically executed and not inside a dynamic
// loader, otherwise this would clobber the thread ID register.
const is_dynamic = @import("dynamic_library.zig").get_DYNAMIC() != null;
if (!is_dynamic) {
std.os.linux.tls.initStaticTLS();
// ARMv6 targets (and earlier) have no support for TLS in hardware.
// FIXME: Elide the check for targets >= ARMv7 when the target feature API
// becomes less verbose (and more usable).
if (comptime native_arch.isARM()) {
if (at_hwcap & std.os.linux.HWCAP_TLS == 0) {
// FIXME: Make __aeabi_read_tp call the kernel helper kuser_get_tls
// For the time being use a simple abort instead of a @panic call to
// keep the binary bloat under control.
std.os.abort();
}
}
// Initialize the TLS area.
std.os.linux.tls.initStaticTLS(phdrs);
// The way Linux executables represent stack size is via the PT_GNU_STACK
// program header. However the kernel does not recognize it; it always gives 8 MiB.
// Here we look for the stack size in our program headers and use setrlimit
// to ask for more stack space.
{
var i: usize = 0;
var at_phdr: usize = undefined;
var at_phnum: usize = undefined;
while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
switch (auxv[i].a_type) {
std.elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
std.elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
else => continue,
}
}
expandStackSize(at_phdr, at_phnum);
}
expandStackSize(phdrs);
}
std.os.exit(@call(.{ .modifier = .always_inline }, callMainWithArgs, .{ argc, argv, envp }));
}
fn expandStackSize(at_phdr: usize, at_phnum: usize) void {
const phdrs = (@intToPtr([*]std.elf.Phdr, at_phdr))[0..at_phnum];
fn expandStackSize(phdrs: []elf.Phdr) void {
for (phdrs) |*phdr| {
switch (phdr.p_type) {
std.elf.PT_GNU_STACK => {
elf.PT_GNU_STACK => {
const wanted_stack_size = phdr.p_memsz;
assert(wanted_stack_size % std.mem.page_size == 0);
@ -362,9 +374,10 @@ fn main(c_argc: i32, c_argv: [*][*:0]u8, c_envp: [*:null]?[*:0]u8) callconv(.C)
const envp = @ptrCast([*][*:0]u8, c_envp)[0..env_count];
if (builtin.os.tag == .linux) {
const at_phdr = std.c.getauxval(std.elf.AT_PHDR);
const at_phnum = std.c.getauxval(std.elf.AT_PHNUM);
expandStackSize(at_phdr, at_phnum);
const at_phdr = std.c.getauxval(elf.AT_PHDR);
const at_phnum = std.c.getauxval(elf.AT_PHNUM);
const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
expandStackSize(phdrs);
}
return @call(.{ .modifier = .always_inline }, callMainWithArgs, .{ @intCast(usize, c_argc), c_argv, envp });

View File

@ -14,7 +14,7 @@ pub fn addCases(ctx: *TestContext) !void {
{
var case = ctx.exe("hello world with updates", target);
case.addError("", &[_][]const u8{
":84:9: error: struct 'test_case.test_case' has no member named 'main'",
":85:9: error: struct 'test_case.test_case' has no member named 'main'",
});
// Incorrect return type

View File

@ -24,7 +24,7 @@ pub fn addCases(ctx: *TestContext) !void {
var case = ctx.exe("hello world with updates", linux_x64);
case.addError("", &[_][]const u8{
":84:9: error: struct 'test_case.test_case' has no member named 'main'",
":85:9: error: struct 'test_case.test_case' has no member named 'main'",
});
// Incorrect return type

View File

@ -33,6 +33,11 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
}
cases.addBuildFile("test/standalone/c_compiler/build.zig", .{ .build_modes = true, .cross_targets = true });
// Try to build and run a PIE executable.
if (std.Target.current.os.tag == .linux) {
cases.addBuildFile("test/standalone/pie/build.zig", .{});
}
// Ensure the development tools are buildable.
cases.add("tools/gen_spirv_spec.zig");
cases.add("tools/gen_stubs.zig");

View File

@ -0,0 +1,12 @@
const Builder = @import("std").build.Builder;
pub fn build(b: *Builder) void {
const main = b.addTest("main.zig");
main.setBuildMode(b.standardReleaseOptions());
main.pie = true;
const test_step = b.step("test", "Test the program");
test_step.dependOn(&main.step);
b.default_step.dependOn(test_step);
}

View File

@ -0,0 +1,15 @@
const std = @import("std");
const elf = std.elf;
threadlocal var foo: u8 = 42;
test "Check ELF header" {
// PIE executables are marked as ET_DYN, regular exes as ET_EXEC.
const header = @intToPtr(*elf.Ehdr, std.process.getBaseAddress());
try std.testing.expectEqual(elf.ET.DYN, header.e_type);
}
test "TLS is initialized" {
// Ensure the TLS is initialized by the startup code.
try std.testing.expectEqual(@as(u8, 42), foo);
}