From 6e6ae8886e6885a648918cdb006f899b40b378aa Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Tue, 11 Oct 2022 11:04:29 -0700
Subject: [PATCH] stage1: Add softfloat support for `@reduce`

---
 src/stage1/analyze.cpp   |   6 +-
 src/stage1/codegen.cpp   | 122 +++++++++++++++++++++++++++++++++------
 src/stage1/softfloat.hpp |  14 +++++
 3 files changed, 123 insertions(+), 19 deletions(-)

diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp
index f0cad841be..2d0624a7c7 100644
--- a/src/stage1/analyze.cpp
+++ b/src/stage1/analyze.cpp
@@ -6358,9 +6358,11 @@ void init_const_float(ZigValue *const_val, ZigType *type, double value) {
                 const_val->data.x_f64 = value;
                 break;
             case 80:
+                zig_double_to_extF80M(value, &const_val->data.x_f80);
+                break;
             case 128:
-                // if we need this, we should add a function that accepts a float128_t param
-                zig_unreachable();
+                zig_double_to_f128M(value, &const_val->data.x_f128);
+                break;
             default:
                 zig_unreachable();
         }
diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 18e30d416f..5546605b88 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -6481,6 +6481,55 @@ static LLVMValueRef ir_render_cmpxchg(CodeGen *g, Stage1Air *executable, Stage1A
     return result_loc;
 }
 
+static LLVMValueRef ir_render_reduced_call(CodeGen *g, LLVMValueRef llvm_fn, LLVMValueRef operand_vector, size_t vector_len, LLVMValueRef accum_init, ZigType *accum_ty) {
+    LLVMTypeRef llvm_usize_ty = g->builtin_types.entry_usize->llvm_type;
+    LLVMValueRef llvm_vector_len = LLVMConstInt(llvm_usize_ty, vector_len, false);
+    LLVMTypeRef llvm_result_ty = LLVMTypeOf(accum_init);
+
+    // Allocate and initialize our mutable variables
+    LLVMValueRef i_ptr = build_alloca(g, g->builtin_types.entry_usize, "i", 0);
+    LLVMBuildStore(g->builder, LLVMConstInt(llvm_usize_ty, 0, false), i_ptr);
+    LLVMValueRef accum_ptr = build_alloca(g, accum_ty, "accum", 0);
+    LLVMBuildStore(g->builder, accum_init, accum_ptr);
+
+    // Setup the loop
+    LLVMBasicBlockRef loop = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoop");
+    LLVMBasicBlockRef loop_exit = LLVMAppendBasicBlock(g->cur_fn_val, "AfterReduce");
+    LLVMBuildBr(g->builder, loop);
+    {
+        LLVMPositionBuilderAtEnd(g->builder, loop);
+
+        // while (i < vec.len)
+        LLVMValueRef i = LLVMBuildLoad2(g->builder, llvm_usize_ty, i_ptr, "");
+        LLVMValueRef cond = LLVMBuildICmp(g->builder, LLVMIntULT, i, llvm_vector_len, "");
+        LLVMBasicBlockRef loop_then = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoopThen");
+
+        LLVMBuildCondBr(g->builder, cond, loop_then, loop_exit);
+
+        {
+            LLVMPositionBuilderAtEnd(g->builder, loop_then);
+
+            // accum = f(accum, vec[i]);
+            LLVMValueRef accum = LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, "");
+            LLVMValueRef element = LLVMBuildExtractElement(g->builder, operand_vector, i, "");
+            LLVMValueRef params[] {
+                accum,
+                element
+            };
+            LLVMValueRef new_accum = LLVMBuildCall2(g->builder, LLVMGlobalGetValueType(llvm_fn), llvm_fn, params, 2, "");
+            LLVMBuildStore(g->builder, new_accum, accum_ptr);
+
+            // i += 1
+            LLVMValueRef new_i = LLVMBuildAdd(g->builder, i, LLVMConstInt(llvm_usize_ty, 1, false), "");
+            LLVMBuildStore(g->builder, new_i, i_ptr);
+            LLVMBuildBr(g->builder, loop);
+        }
+    }
+
+    LLVMPositionBuilderAtEnd(g->builder, loop_exit);
+    return LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, "");
+}
+
 static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1AirInstReduce *instruction) {
     LLVMValueRef value = ir_llvm_value(g, instruction->value);
 
@@ -6488,61 +6537,100 @@ static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1Ai
     assert(value_type->id == ZigTypeIdVector);
     ZigType *scalar_type = value_type->data.vector.elem_type;
 
+    bool float_intrinsics_allowed = true;
+    const char *compiler_rt_type_abbrev = nullptr;
+    const char *math_float_prefix = nullptr;
+    const char *math_float_suffix = nullptr;
+    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
+        float_intrinsics_allowed = false;
+        compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(scalar_type);
+        math_float_prefix = libc_float_prefix(g, scalar_type);
+        math_float_suffix = libc_float_suffix(g, scalar_type);
+    }
+
     ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, &instruction->base));
 
-    LLVMValueRef result_val;
+    char fn_name[64];
+    ZigValue *init_value = nullptr;
     switch (instruction->op) {
         case ReduceOp_and:
             assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildAndReduce(g->builder, value);
+            return ZigLLVMBuildAndReduce(g->builder, value);
             break;
         case ReduceOp_or:
             assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildOrReduce(g->builder, value);
+            return ZigLLVMBuildOrReduce(g->builder, value);
             break;
         case ReduceOp_xor:
             assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildXorReduce(g->builder, value);
+            return ZigLLVMBuildXorReduce(g->builder, value);
             break;
         case ReduceOp_min: {
             if (scalar_type->id == ZigTypeIdInt) {
                 const bool is_signed = scalar_type->data.integral.is_signed;
-                result_val = ZigLLVMBuildIntMinReduce(g->builder, value, is_signed);
+                return ZigLLVMBuildIntMinReduce(g->builder, value, is_signed);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                result_val = ZigLLVMBuildFPMinReduce(g->builder, value);
+                if (float_intrinsics_allowed) {
+                    return ZigLLVMBuildFPMinReduce(g->builder, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "%sfmin%s", math_float_prefix, math_float_suffix);
+                    init_value = create_const_float(g, scalar_type, NAN);
+                }
             } else zig_unreachable();
         } break;
         case ReduceOp_max: {
             if (scalar_type->id == ZigTypeIdInt) {
                 const bool is_signed = scalar_type->data.integral.is_signed;
-                result_val = ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed);
+                return ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                result_val = ZigLLVMBuildFPMaxReduce(g->builder, value);
+                if (float_intrinsics_allowed) {
+                    return ZigLLVMBuildFPMaxReduce(g->builder, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "%sfmax%s", math_float_prefix, math_float_suffix);
+                    init_value = create_const_float(g, scalar_type, NAN);
+                }
             } else zig_unreachable();
         } break;
         case ReduceOp_add: {
             if (scalar_type->id == ZigTypeIdInt) {
-                result_val = ZigLLVMBuildAddReduce(g->builder, value);
+                return ZigLLVMBuildAddReduce(g->builder, value);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                LLVMValueRef neutral_value = LLVMConstReal(
-                        get_llvm_type(g, scalar_type), -0.0);
-                result_val = ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value);
+                if (float_intrinsics_allowed) {
+                    LLVMValueRef neutral_value = LLVMConstReal(
+                            get_llvm_type(g, scalar_type), -0.0);
+                    return ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "__add%sf3", compiler_rt_type_abbrev);
+                    init_value = create_const_float(g, scalar_type, 0.0);
+                }
             } else zig_unreachable();
         } break;
         case ReduceOp_mul: {
             if (scalar_type->id == ZigTypeIdInt) {
-                result_val = ZigLLVMBuildMulReduce(g->builder, value);
+                return ZigLLVMBuildMulReduce(g->builder, value);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                LLVMValueRef neutral_value = LLVMConstReal(
-                        get_llvm_type(g, scalar_type), 1.0);
-                result_val = ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value);
+                if (float_intrinsics_allowed) {
+                    LLVMValueRef neutral_value = LLVMConstReal(
+                            get_llvm_type(g, scalar_type), 1.0);
+                    return ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "__mul%sf3", compiler_rt_type_abbrev);
+                    init_value = create_const_float(g, scalar_type, 1.0);
+                }
             } else zig_unreachable();
         } break;
         default:
             zig_unreachable();
     }
 
-    return result_val;
+
+    LLVMValueRef llvm_init_value = gen_const_val(g, init_value, "");
+    uint32_t vector_len = value_type->data.vector.len;
+    LLVMTypeRef llvm_scalar_type = get_llvm_type(g, scalar_type);
+    const LLVMValueRef llvm_fn = get_soft_float_fn(g, fn_name, 2, llvm_scalar_type, llvm_scalar_type);
+    return ir_render_reduced_call(g, llvm_fn, value, vector_len, llvm_init_value, scalar_type);
 }
 
 static LLVMValueRef ir_render_fence(CodeGen *g, Stage1Air *executable, Stage1AirInstFence *instruction) {
diff --git a/src/stage1/softfloat.hpp b/src/stage1/softfloat.hpp
index a0d270d55f..b9d886d311 100644
--- a/src/stage1/softfloat.hpp
+++ b/src/stage1/softfloat.hpp
@@ -21,6 +21,20 @@ static inline float16_t zig_double_to_f16(double x) {
     return f64_to_f16(y);
 }
 
+static inline void zig_double_to_extF80M(double x, extFloat80_t *result) {
+    float64_t y;
+    static_assert(sizeof(x) == sizeof(y), "");
+    memcpy(&y, &x, sizeof(x));
+    f64_to_extF80M(y, result);
+}
+
+static inline void zig_double_to_f128M(double x, float128_t *result) {
+    float64_t y;
+    static_assert(sizeof(x) == sizeof(y), "");
+    memcpy(&y, &x, sizeof(x));
+    f64_to_f128M(y, result);
+}
+
 
 // Return value is safe to coerce to float even when |x| is NaN or Infinity.
 static inline double zig_f16_to_double(float16_t x) {