stage2: implement @setAlignStack and 128-bit cmpxchg

* test runner is improved to respect `error.SkipZigTest` * start code is improved to `@setAlignStack(16)` before calling main() * the newly passing behavior test has a workaround for the fact that stage2 cannot yet call `std.Target.x86.featureSetHas()` at comptime. This is blocking on comptime closures. The workaround is that there is a new decl `@import("builtin").stage2_x86_cx16` which is a `bool`. * Implement `@setAlignStack`. This language feature should be re-evaluated at some point - I'll file an issue for it. * LLVM backend: apply/remove the cold attribute and noinline attribute where appropriate. * LLVM backend: loads and stores are properly annotated with alignment and volatile attributes. * LLVM backend: allocas are properly annotated with alignment. * Type: fix integers reporting wrong alignment for 256-bit integers and beyond. Once you get to 16 byte aligned, there is no further alignment for larger integers.
2025-12-06 06:13:07 +00:00 · 2021-09-16 21:03:55 -07:00 · 2021-09-16 21:03:55 -07:00 · dbe9a5114e
commit dbe9a5114e
parent dc9d76b630
12 changed files with 195 additions and 49 deletions
--- a/lib/std/special/test_runner.zig
+++ b/lib/std/special/test_runner.zig
@ -123,8 +123,16 @@ pub fn log(
 }

 pub fn main2() anyerror!void {
+    var bad = false;
    // Simpler main(), exercising fewer language features, so that stage2 can handle it.
    for (builtin.test_functions) |test_fn| {
-        try test_fn.func();
+        test_fn.func() catch |err| {
+            if (err != error.SkipZigTest) {
+                bad = true;
+            }
+        };
+    }
+    if (bad) {
+        return error.TestsFailed;
    }
 }
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@ -87,6 +87,11 @@ fn main2() callconv(.C) c_int {
 }

 fn _start2() callconv(.Naked) noreturn {
+    callMain2();
+}
+
+fn callMain2() noreturn {
+    @setAlignStack(16);
    root.main();
    exit2(0);
 }
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@ -3714,6 +3714,8 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: *Allocator) Alloc
    const target = comp.getTarget();
    const generic_arch_name = target.cpu.arch.genericName();
    const use_stage1 = build_options.is_stage1 and comp.bin_file.options.use_stage1;
+    const stage2_x86_cx16 = target.cpu.arch == .x86_64 and
+        std.Target.x86.featureSetHas(target.cpu.features, .cx16);

    @setEvalBranchQuota(4000);
    try buffer.writer().print(
@ -3725,6 +3727,8 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: *Allocator) Alloc
        \\pub const zig_is_stage2 = {};
        \\/// Temporary until self-hosted supports the `cpu.arch` value.
        \\pub const stage2_arch: std.Target.Cpu.Arch = .{};
+        \\/// Temporary until self-hosted can call `std.Target.x86.featureSetHas` at comptime.
+        \\pub const stage2_x86_cx16 = {};
        \\
        \\pub const output_mode = std.builtin.OutputMode.{};
        \\pub const link_mode = std.builtin.LinkMode.{};
@ -3740,6 +3744,7 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: *Allocator) Alloc
        build_options.version,
        !use_stage1,
        std.zig.fmtId(@tagName(target.cpu.arch)),
+        stage2_x86_cx16,
        std.zig.fmtId(@tagName(comp.bin_file.options.output_mode)),
        std.zig.fmtId(@tagName(comp.bin_file.options.link_mode)),
        comp.bin_file.options.is_test,
--- a/src/Module.zig
+++ b/src/Module.zig
@ -64,11 +64,16 @@ import_table: std.StringArrayHashMapUnmanaged(*Scope.File) = .{},
 /// The set of all the generic function instantiations. This is used so that when a generic
 /// function is called twice with the same comptime parameter arguments, both calls dispatch
 /// to the same function.
+/// TODO: remove functions from this set when they are destroyed.
 monomorphed_funcs: MonomorphedFuncsSet = .{},
-
 /// The set of all comptime function calls that have been cached so that future calls
 /// with the same parameters will get the same return value.
 memoized_calls: MemoizedCallSet = .{},
+/// Contains the values from `@setAlignStack`. A sparse table is used here
+/// instead of a field of `Fn` because usage of `@setAlignStack` is rare, while
+/// functions are many.
+/// TODO: remove functions from this set when they are destroyed.
+align_stack_fns: std.AutoHashMapUnmanaged(*const Fn, SetAlignStack) = .{},

 /// We optimize memory usage for a compilation with no compile errors by storing the
 /// error messages and mapping outside of `Decl`.
@ -215,6 +220,13 @@ pub const MemoizedCall = struct {
    }
 };

+pub const SetAlignStack = struct {
+    alignment: u32,
+    /// TODO: This needs to store a non-lazy source location for the case of an inline function
+    /// which does `@setAlignStack` (applying it to the caller).
+    src: LazySrcLoc,
+};
+
 /// A `Module` has zero or one of these depending on whether `-femit-h` is enabled.
 pub const GlobalEmitH = struct {
    /// Where to put the output.
@ -881,6 +893,7 @@ pub const Fn = struct {

    state: Analysis,
    is_cold: bool = false,
+    is_noinline: bool = false,

    pub const Analysis = enum {
        queued,
@ -2347,6 +2360,7 @@ pub fn deinit(mod: *Module) void {

    mod.error_name_list.deinit(gpa);
    mod.test_functions.deinit(gpa);
+    mod.align_stack_fns.deinit(gpa);
    mod.monomorphed_funcs.deinit(gpa);

    {
@ -3977,6 +3991,12 @@ fn markOutdatedDecl(mod: *Module, decl: *Decl) !void {
    if (mod.failed_decls.fetchSwapRemove(decl)) |kv| {
        kv.value.destroy(mod.gpa);
    }
+    if (decl.has_tv and decl.owns_tv) {
+        if (decl.val.castTag(.function)) |payload| {
+            const func = payload.data;
+            _ = mod.align_stack_fns.remove(func);
+        }
+    }
    if (mod.emit_h) |emit_h| {
        if (emit_h.failed_decls.fetchSwapRemove(decl)) |kv| {
            kv.value.destroy(mod.gpa);
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -833,6 +833,7 @@ fn failWithUseOfUndef(sema: *Sema, block: *Scope.Block, src: LazySrcLoc) Compile
 /// Appropriate to call when the coercion has already been done by result
 /// location semantics. Asserts the value fits in the provided `Int` type.
 /// Only supports `Int` types 64 bits or less.
+/// TODO don't ever call this since we're migrating towards ResultLoc.coerced_ty.
 fn resolveAlreadyCoercedInt(
    sema: *Sema,
    block: *Scope.Block,
@ -849,6 +850,23 @@ fn resolveAlreadyCoercedInt(
    }
 }

+fn resolveAlign(
+    sema: *Sema,
+    block: *Scope.Block,
+    src: LazySrcLoc,
+    zir_ref: Zir.Inst.Ref,
+) !u16 {
+    const alignment_big = try sema.resolveInt(block, src, zir_ref, Type.initTag(.u16));
+    const alignment = @intCast(u16, alignment_big); // We coerce to u16 in the prev line.
+    if (alignment == 0) return sema.mod.fail(&block.base, src, "alignment must be >= 1", .{});
+    if (!std.math.isPowerOfTwo(alignment)) {
+        return sema.mod.fail(&block.base, src, "alignment value {d} is not a power of two", .{
+            alignment,
+        });
+    }
+    return alignment;
+}
+
 fn resolveInt(
    sema: *Sema,
    block: *Scope.Block,
@ -2285,9 +2303,36 @@ fn zirExport(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileErro
 }

 fn zirSetAlignStack(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!void {
+    const mod = sema.mod;
    const inst_data = sema.code.instructions.items(.data)[inst].un_node;
+    const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
    const src: LazySrcLoc = inst_data.src();
-    return sema.mod.fail(&block.base, src, "TODO: implement Sema.zirSetAlignStack", .{});
+    const alignment = try sema.resolveAlign(block, operand_src, inst_data.operand);
+    if (alignment > 256) {
+        return mod.fail(&block.base, src, "attempt to @setAlignStack({d}); maximum is 256", .{
+            alignment,
+        });
+    }
+    const func = sema.owner_func orelse
+        return mod.fail(&block.base, src, "@setAlignStack outside function body", .{});
+
+    switch (func.owner_decl.ty.fnCallingConvention()) {
+        .Naked => return mod.fail(&block.base, src, "@setAlignStack in naked function", .{}),
+        .Inline => return mod.fail(&block.base, src, "@setAlignStack in inline function", .{}),
+        else => {},
+    }
+
+    const gop = try mod.align_stack_fns.getOrPut(mod.gpa, func);
+    if (gop.found_existing) {
+        const msg = msg: {
+            const msg = try mod.errMsg(&block.base, src, "multiple @setAlignStack in the same function body", .{});
+            errdefer msg.destroy(mod.gpa);
+            try mod.errNote(&block.base, src, msg, "other instance here", .{});
+            break :msg msg;
+        };
+        return mod.failWithOwnedErrorMsg(&block.base, msg);
+    }
+    gop.value_ptr.* = .{ .alignment = alignment, .src = src };
 }

 fn zirSetCold(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!void {
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@ -355,6 +355,20 @@ pub const Object = struct {

        const llvm_func = try dg.resolveLlvmFunction(decl);

+        if (module.align_stack_fns.get(func)) |align_info| {
+            dg.addFnAttrInt(llvm_func, "alignstack", align_info.alignment);
+            dg.addFnAttr(llvm_func, "noinline");
+        } else {
+            DeclGen.removeFnAttr(llvm_func, "alignstack");
+            if (!func.is_noinline) DeclGen.removeFnAttr(llvm_func, "noinline");
+        }
+
+        if (func.is_cold) {
+            dg.addFnAttr(llvm_func, "cold");
+        } else {
+            DeclGen.removeFnAttr(llvm_func, "cold");
+        }
+
        // This gets the LLVM values from the function and stores them in `dg.args`.
        const fn_param_len = decl.ty.fnParamLen();
        var args = try dg.gpa.alloc(*const llvm.Value, fn_param_len);
@ -512,7 +526,9 @@ pub const DeclGen = struct {
        }
    }

-    /// If the llvm function does not exist, create it
+    /// If the llvm function does not exist, create it.
+    /// Note that this can be called before the function's semantic analysis has
+    /// completed, so if any attributes rely on that, they must be done in updateFunc, not here.
    fn resolveLlvmFunction(self: *DeclGen, decl: *Module.Decl) !*const llvm.Value {
        if (self.llvmModule().getNamedFunction(decl.name)) |llvm_fn| return llvm_fn;

@ -895,17 +911,39 @@ pub const DeclGen = struct {
        }
    }

-    // Helper functions
-    fn addAttr(self: *DeclGen, val: *const llvm.Value, index: llvm.AttributeIndex, name: []const u8) void {
+    fn addAttr(dg: *DeclGen, val: *const llvm.Value, index: llvm.AttributeIndex, name: []const u8) void {
+        return dg.addAttrInt(val, index, name, 0);
+    }
+
+    fn removeAttr(val: *const llvm.Value, index: llvm.AttributeIndex, name: []const u8) void {
        const kind_id = llvm.getEnumAttributeKindForName(name.ptr, name.len);
        assert(kind_id != 0);
-        const llvm_attr = self.context.createEnumAttribute(kind_id, 0);
+        val.removeEnumAttributeAtIndex(index, kind_id);
+    }
+
+    fn addAttrInt(
+        dg: *DeclGen,
+        val: *const llvm.Value,
+        index: llvm.AttributeIndex,
+        name: []const u8,
+        int: u64,
+    ) void {
+        const kind_id = llvm.getEnumAttributeKindForName(name.ptr, name.len);
+        assert(kind_id != 0);
+        const llvm_attr = dg.context.createEnumAttribute(kind_id, int);
        val.addAttributeAtIndex(index, llvm_attr);
    }

-    fn addFnAttr(self: *DeclGen, val: *const llvm.Value, attr_name: []const u8) void {
-        // TODO: improve this API, `addAttr(-1, attr_name)`
-        self.addAttr(val, std.math.maxInt(llvm.AttributeIndex), attr_name);
+    fn addFnAttr(dg: *DeclGen, val: *const llvm.Value, name: []const u8) void {
+        dg.addAttr(val, std.math.maxInt(llvm.AttributeIndex), name);
+    }
+
+    fn removeFnAttr(fn_val: *const llvm.Value, name: []const u8) void {
+        removeAttr(fn_val, std.math.maxInt(llvm.AttributeIndex), name);
+    }
+
+    fn addFnAttrInt(dg: *DeclGen, fn_val: *const llvm.Value, name: []const u8, int: u64) void {
+        return dg.addAttrInt(fn_val, std.math.maxInt(llvm.AttributeIndex), name, int);
    }

    /// If the operand type of an atomic operation is not byte sized we need to
@ -1975,12 +2013,13 @@ pub const FuncGen = struct {
            return null;
        // buildAlloca expects the pointee type, not the pointer type, so assert that
        // a Payload.PointerSimple is passed to the alloc instruction.
-        const inst_ty = self.air.typeOfIndex(inst);
-        const pointee_type = inst_ty.castPointer().?.data;
-
-        // TODO: figure out a way to get the name of the var decl.
-        // TODO: set alignment and volatile
-        return self.buildAlloca(try self.dg.llvmType(pointee_type));
+        const ptr_ty = self.air.typeOfIndex(inst);
+        const pointee_type = ptr_ty.elemType();
+        const pointee_llvm_ty = try self.dg.llvmType(pointee_type);
+        const target = self.dg.module.getTarget();
+        const alloca_inst = self.buildAlloca(pointee_llvm_ty);
+        alloca_inst.setAlignment(ptr_ty.ptrAlignment(target));
+        return alloca_inst;
    }

    /// Use this instead of builder.buildAlloca, because this function makes sure to
@ -2200,8 +2239,11 @@ pub const FuncGen = struct {
    }

    fn load(self: *FuncGen, ptr: *const llvm.Value, ptr_ty: Type) *const llvm.Value {
-        _ = ptr_ty; // TODO set volatile and alignment on this load properly
-        return self.builder.buildLoad(ptr, "");
+        const llvm_inst = self.builder.buildLoad(ptr, "");
+        const target = self.dg.module.getTarget();
+        llvm_inst.setAlignment(ptr_ty.ptrAlignment(target));
+        llvm_inst.setVolatile(llvm.Bool.fromBool(ptr_ty.isVolatilePtr()));
+        return llvm_inst;
    }

    fn store(
@ -2210,8 +2252,11 @@ pub const FuncGen = struct {
        ptr_ty: Type,
        elem: *const llvm.Value,
    ) *const llvm.Value {
-        _ = ptr_ty; // TODO set volatile and alignment on this store properly
-        return self.builder.buildStore(elem, ptr);
+        const llvm_inst = self.builder.buildStore(elem, ptr);
+        const target = self.dg.module.getTarget();
+        llvm_inst.setAlignment(ptr_ty.ptrAlignment(target));
+        llvm_inst.setVolatile(llvm.Bool.fromBool(ptr_ty.isVolatilePtr()));
+        return llvm_inst;
    }
 };

--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@ -85,6 +85,9 @@ pub const Value = opaque {
    pub const addAttributeAtIndex = LLVMAddAttributeAtIndex;
    extern fn LLVMAddAttributeAtIndex(*const Value, Idx: AttributeIndex, A: *const Attribute) void;

+    pub const removeEnumAttributeAtIndex = LLVMRemoveEnumAttributeAtIndex;
+    extern fn LLVMRemoveEnumAttributeAtIndex(F: *const Value, Idx: AttributeIndex, KindID: c_uint) void;
+
    pub const getFirstBasicBlock = LLVMGetFirstBasicBlock;
    extern fn LLVMGetFirstBasicBlock(Fn: *const Value) ?*const BasicBlock;

@ -136,6 +139,12 @@ pub const Value = opaque {

    pub const setOrdering = LLVMSetOrdering;
    extern fn LLVMSetOrdering(MemoryAccessInst: *const Value, Ordering: AtomicOrdering) void;
+
+    pub const setVolatile = LLVMSetVolatile;
+    extern fn LLVMSetVolatile(MemoryAccessInst: *const Value, IsVolatile: Bool) void;
+
+    pub const setAlignment = LLVMSetAlignment;
+    extern fn LLVMSetAlignment(V: *const Value, Bytes: c_uint) void;
 };

 pub const Type = opaque {
--- a/src/type.zig
+++ b/src/type.zig
@ -1583,7 +1583,11 @@ pub const Type = extern union {

            .int_signed, .int_unsigned => {
                const bits: u16 = self.cast(Payload.Bits).?.data;
-                return std.math.ceilPowerOfTwoPromote(u16, (bits + 7) / 8);
+                if (bits <= 8) return 1;
+                if (bits <= 16) return 2;
+                if (bits <= 32) return 4;
+                if (bits <= 64) return 8;
+                return 16;
            },

            .optional => {
--- a/test/behavior/atomics.zig
+++ b/test/behavior/atomics.zig
@ -81,3 +81,33 @@ test "cmpxchg with ignored result" {

    try expect(5678 == x);
 }
+
+test "128-bit cmpxchg" {
+    try test_u128_cmpxchg();
+    comptime try test_u128_cmpxchg();
+}
+
+fn test_u128_cmpxchg() !void {
+    if (builtin.zig_is_stage2) {
+        if (builtin.stage2_arch != .x86_64) return error.SkipZigTest;
+        if (!builtin.stage2_x86_cx16) return error.SkipZigTest;
+    } else {
+        if (builtin.cpu.arch != .x86_64) return error.SkipZigTest;
+        if (comptime !std.Target.x86.featureSetHas(builtin.cpu.features, .cx16)) return error.SkipZigTest;
+    }
+
+    var x: u128 = 1234;
+    if (@cmpxchgWeak(u128, &x, 99, 5678, .SeqCst, .SeqCst)) |x1| {
+        try expect(x1 == 1234);
+    } else {
+        @panic("cmpxchg should have failed");
+    }
+
+    while (@cmpxchgWeak(u128, &x, 1234, 5678, .SeqCst, .SeqCst)) |x1| {
+        try expect(x1 == 1234);
+    }
+    try expect(x == 5678);
+
+    try expect(@cmpxchgStrong(u128, &x, 5678, 42, .SeqCst, .SeqCst) == null);
+    try expect(x == 42);
+}
--- a/test/behavior/atomics_stage1.zig
+++ b/test/behavior/atomics_stage1.zig
@ -3,31 +3,6 @@ const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
 const builtin = @import("builtin");

-test "128-bit cmpxchg" {
-    try test_u128_cmpxchg();
-    comptime try test_u128_cmpxchg();
-}
-
-fn test_u128_cmpxchg() !void {
-    if (std.Target.current.cpu.arch != .x86_64) return error.SkipZigTest;
-    if (comptime !std.Target.x86.featureSetHas(std.Target.current.cpu.features, .cx16)) return error.SkipZigTest;
-
-    var x: u128 = 1234;
-    if (@cmpxchgWeak(u128, &x, 99, 5678, .SeqCst, .SeqCst)) |x1| {
-        try expect(x1 == 1234);
-    } else {
-        @panic("cmpxchg should have failed");
-    }
-
-    while (@cmpxchgWeak(u128, &x, 1234, 5678, .SeqCst, .SeqCst)) |x1| {
-        try expect(x1 == 1234);
-    }
-    try expect(x == 5678);
-
-    try expect(@cmpxchgStrong(u128, &x, 5678, 42, .SeqCst, .SeqCst) == null);
-    try expect(x == 42);
-}
-
 var a_global_variable = @as(u32, 1234);

 test "cmpxchg on a global variable" {
--- a/test/cases.zig
+++ b/test/cases.zig
@ -26,7 +26,7 @@ pub fn addCases(ctx: *TestContext) !void {
        var case = ctx.exe("hello world with updates", linux_x64);

        case.addError("", &[_][]const u8{
-            ":90:9: error: struct 'tmp.tmp' has no member named 'main'",
+            ":95:9: error: struct 'tmp.tmp' has no member named 'main'",
        });

        // Incorrect return type
--- a/test/stage2/darwin.zig
+++ b/test/stage2/darwin.zig
@ -12,9 +12,9 @@ pub fn addCases(ctx: *TestContext) !void {
            .os_tag = .macos,
        };
        {
-            var case = ctx.exe("hello world with updates", target);
+            var case = ctx.exe("darwin hello world with updates", target);
            case.addError("", &[_][]const u8{
-                ":90:9: error: struct 'tmp.tmp' has no member named 'main'",
+                ":95:9: error: struct 'tmp.tmp' has no member named 'main'",
            });

            // Incorrect return type