stage1: Lower libcalls on Windows x86-64 correctly

This change is the Zig counterpart to https://reviews.llvm.org/D110413 Since we lower some libcalls directly (just like clang does), we need to make sure that the ABI we call with matches the ABI of the compiler-rt we are providing (and also the ABI expected by LLVM). While I was at it, I noticed some flawed vector handling in the binary soft float ops in stage 1, so I shored up the logic a bit and expanded an existing test to cover the missing functionality.
2026-02-20 16:24:51 +00:00 · 2022-07-10 16:46:33 -07:00 · 2022-07-10 16:46:33 -07:00 · d182e2ebda
commit d182e2ebda
parent bb8971150c
2 changed files with 52 additions and 70 deletions
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@ -3371,14 +3371,12 @@ static LLVMValueRef add_icmp(CodeGen *g, LLVMValueRef val, Icmp kind) {
 }

 static LLVMValueRef gen_soft_int_to_float_op(CodeGen *g, LLVMValueRef value_ref, ZigType *operand_type, ZigType *result_type) {
-    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
-
    // Handle integers of non-pot bitsize by widening them.
    const size_t bitsize = operand_type->data.integral.bit_count;
    const bool is_signed = operand_type->data.integral.is_signed;
    if (bitsize < 32 || !is_power_of_2(bitsize)) {
        const size_t wider_bitsize = bitsize < 32 ? 32 : round_to_next_power_of_2(bitsize);
-        ZigType *const wider_type = get_int_type(g, is_signed, wider_bitsize);
+        ZigType *wider_type = get_int_type(g, is_signed, wider_bitsize);
        value_ref = gen_widen_or_shorten(g, false, operand_type, wider_type, value_ref);
        operand_type = wider_type;
    }
@ -3395,35 +3393,22 @@ static LLVMValueRef gen_soft_int_to_float_op(CodeGen *g, LLVMValueRef value_ref,
    }

    int param_count = 1;
-    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, result_type->llvm_type);
-
-    LLVMValueRef result;
-    if (vector_len == 0) {
-        LLVMValueRef params[1] = {value_ref};
-        result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+    LLVMValueRef func_ref;
+    if ((operand_type->data.integral.bit_count == 128) && (g->zig_target->os == OsWindows) && (g->zig_target->arch == ZigLLVM_x86_64)) {
+        // On Windows x86-64, "ti" functions must use Vector(2, u64) instead of the standard i128 calling
+        // convention to adhere to the ABI that LLVM expects compiler-rt to have.
+        LLVMTypeRef v2i64 = LLVMVectorType(LLVMInt64Type(), 2);
+        value_ref = LLVMBuildBitCast(g->builder, value_ref, v2i64, "");
+        func_ref = get_soft_float_fn(g, fn_name, param_count, v2i64, result_type->llvm_type);
    } else {
-        ZigType *alloca_ty = operand_type;
-        result = build_alloca(g, alloca_ty, "", 0);
-
-        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
-        for (uint32_t i = 0; i < vector_len; i++) {
-            LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
-            LLVMValueRef params[1] = {
-                LLVMBuildExtractElement(g->builder, value_ref, index_value, ""),
-            };
-            LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
-            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
-                call_result, index_value, "");
-        }
-
-        result = LLVMBuildLoad(g->builder, result, "");
+        func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, result_type->llvm_type);
    }
-    return result;
+
+    LLVMValueRef params[1] = {value_ref};
+    return LLVMBuildCall(g->builder, func_ref, params, param_count, "");
 }

 static LLVMValueRef gen_soft_float_to_int_op(CodeGen *g, LLVMValueRef value_ref, ZigType *operand_type, ZigType *result_type) {
-    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
-
    // Handle integers of non-pot bitsize by truncating a sufficiently wide pot integer
    const size_t bitsize = result_type->data.integral.bit_count;
    const bool is_signed = result_type->data.integral.is_signed;
@ -3445,46 +3430,41 @@ static LLVMValueRef gen_soft_float_to_int_op(CodeGen *g, LLVMValueRef value_ref,
    }

    int param_count = 1;
-    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, wider_type->llvm_type);
-
-    LLVMValueRef result;
-    if (vector_len == 0) {
-        LLVMValueRef params[1] = {value_ref};
-        result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+    LLVMValueRef func_ref;
+    if ((wider_type->data.integral.bit_count == 128) && (g->zig_target->os == OsWindows) && (g->zig_target->arch == ZigLLVM_x86_64)) {
+        // On Windows x86-64, "ti" functions must use Vector(2, u64) instead of the standard i128 calling
+        // convention to adhere to the ABI that LLVM expects compiler-rt to have.
+        LLVMTypeRef v2i64 = LLVMVectorType(LLVMInt64Type(), 2);
+        func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, v2i64);
    } else {
-        ZigType *alloca_ty = operand_type;
-        result = build_alloca(g, alloca_ty, "", 0);
+        func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, wider_type->llvm_type);
+    }

-        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
-        for (uint32_t i = 0; i < vector_len; i++) {
-            LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
-            LLVMValueRef params[1] = {
-                LLVMBuildExtractElement(g->builder, value_ref, index_value, ""),
-            };
-            LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
-            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
-                call_result, index_value, "");
-        }
+    LLVMValueRef params[1] = {value_ref};
+    LLVMValueRef result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");

-        result = LLVMBuildLoad(g->builder, result, "");
+    if ((wider_type->data.integral.bit_count == 128) && (g->zig_target->os == OsWindows) && (g->zig_target->arch == ZigLLVM_x86_64)) {
+        result = LLVMBuildBitCast(g->builder, result, wider_type->llvm_type, "");
    }

    // Handle integers of non-pot bitsize by shortening them on the output
    if (result_type != wider_type) {
-        return gen_widen_or_shorten(g, false, wider_type, result_type, result);
+        result = gen_widen_or_shorten(g, false, wider_type, result_type, result); 
    }
+
    return result;
 }

 static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LLVMValueRef op2_value, ZigType *operand_type, IrBinOp op_id) {
    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;

-    LLVMTypeRef return_type = operand_type->llvm_type;
    int param_count = 2;

-    const char *compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(operand_type);
-    const char *math_float_prefix = libc_float_prefix(g, operand_type);
-    const char *math_float_suffix = libc_float_suffix(g, operand_type);
+    ZigType *operand_scalar_type = (operand_type->id == ZigTypeIdVector) ? operand_type->data.vector.elem_type : operand_type;
+    LLVMTypeRef return_scalar_type = operand_scalar_type->llvm_type;
+    const char *compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(operand_scalar_type);
+    const char *math_float_prefix = libc_float_prefix(g, operand_scalar_type);
+    const char *math_float_suffix = libc_float_suffix(g, operand_scalar_type);

    char fn_name[64];
    Icmp res_icmp = NONE;
@ -3511,32 +3491,32 @@ static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LL
        case IrBinOpShlSat:
            zig_unreachable();
        case IrBinOpCmpEq:
-            return_type = g->builtin_types.entry_i32->llvm_type;
+            return_scalar_type = g->builtin_types.entry_i32->llvm_type;
            snprintf(fn_name, sizeof(fn_name), "__eq%sf2", compiler_rt_type_abbrev);
            res_icmp = EQ_ZERO;
            break;
        case IrBinOpCmpNotEq:
-            return_type = g->builtin_types.entry_i32->llvm_type;
+            return_scalar_type = g->builtin_types.entry_i32->llvm_type;
            snprintf(fn_name, sizeof(fn_name), "__ne%sf2", compiler_rt_type_abbrev);
            res_icmp = NE_ZERO;
            break;
        case IrBinOpCmpLessOrEq:
-            return_type = g->builtin_types.entry_i32->llvm_type;
+            return_scalar_type = g->builtin_types.entry_i32->llvm_type;
            snprintf(fn_name, sizeof(fn_name), "__le%sf2", compiler_rt_type_abbrev);
            res_icmp = LE_ZERO;
            break;
        case IrBinOpCmpLessThan:
-            return_type = g->builtin_types.entry_i32->llvm_type;
+            return_scalar_type = g->builtin_types.entry_i32->llvm_type;
            snprintf(fn_name, sizeof(fn_name), "__le%sf2", compiler_rt_type_abbrev);
            res_icmp = EQ_NEG;
            break;
        case IrBinOpCmpGreaterOrEq:
-            return_type = g->builtin_types.entry_i32->llvm_type;
+            return_scalar_type = g->builtin_types.entry_i32->llvm_type;
            snprintf(fn_name, sizeof(fn_name), "__ge%sf2", compiler_rt_type_abbrev);
            res_icmp = GE_ZERO;
            break;
        case IrBinOpCmpGreaterThan:
-            return_type = g->builtin_types.entry_i32->llvm_type;
+            return_scalar_type = g->builtin_types.entry_i32->llvm_type;
            snprintf(fn_name, sizeof(fn_name), "__ge%sf2", compiler_rt_type_abbrev);
            res_icmp = EQ_ONE;
            break;
@ -3569,7 +3549,7 @@ static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LL
            zig_unreachable();
    }

-    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, return_type);
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_scalar_type->llvm_type, return_scalar_type);

    LLVMValueRef result;
    if (vector_len == 0) {
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@ -101,18 +101,20 @@ test "vector float operators" {
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO

-    const S = struct {
-        fn doTheTest() !void {
-            var v: @Vector(4, f32) = [4]f32{ 10, 20, 30, 40 };
-            var x: @Vector(4, f32) = [4]f32{ 1, 2, 3, 4 };
-            try expect(mem.eql(f32, &@as([4]f32, v + x), &[4]f32{ 11, 22, 33, 44 }));
-            try expect(mem.eql(f32, &@as([4]f32, v - x), &[4]f32{ 9, 18, 27, 36 }));
-            try expect(mem.eql(f32, &@as([4]f32, v * x), &[4]f32{ 10, 40, 90, 160 }));
-            try expect(mem.eql(f32, &@as([4]f32, -x), &[4]f32{ -1, -2, -3, -4 }));
-        }
-    };
-    try S.doTheTest();
-    comptime try S.doTheTest();
+    inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
+        const S = struct {
+            fn doTheTest() !void {
+                var v: @Vector(4, T) = [4]T{ 10, 20, 30, 40 };
+                var x: @Vector(4, T) = [4]T{ 1, 2, 3, 4 };
+                try expect(mem.eql(T, &@as([4]T, v + x), &[4]T{ 11, 22, 33, 44 }));
+                try expect(mem.eql(T, &@as([4]T, v - x), &[4]T{ 9, 18, 27, 36 }));
+                try expect(mem.eql(T, &@as([4]T, v * x), &[4]T{ 10, 40, 90, 160 }));
+                try expect(mem.eql(T, &@as([4]T, -x), &[4]T{ -1, -2, -3, -4 }));
+            }
+        };
+        try S.doTheTest();
+        comptime try S.doTheTest();
+    }
 }

 test "vector bit operators" {