From 6e6ae8886e6885a648918cdb006f899b40b378aa Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 11 Oct 2022 11:04:29 -0700 Subject: [PATCH] stage1: Add softfloat support for `@reduce` --- src/stage1/analyze.cpp | 6 +- src/stage1/codegen.cpp | 122 +++++++++++++++++++++++++++++++++------ src/stage1/softfloat.hpp | 14 +++++ 3 files changed, 123 insertions(+), 19 deletions(-) diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp index f0cad841be..2d0624a7c7 100644 --- a/src/stage1/analyze.cpp +++ b/src/stage1/analyze.cpp @@ -6358,9 +6358,11 @@ void init_const_float(ZigValue *const_val, ZigType *type, double value) { const_val->data.x_f64 = value; break; case 80: + zig_double_to_extF80M(value, &const_val->data.x_f80); + break; case 128: - // if we need this, we should add a function that accepts a float128_t param - zig_unreachable(); + zig_double_to_f128M(value, &const_val->data.x_f128); + break; default: zig_unreachable(); } diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp index 18e30d416f..5546605b88 100644 --- a/src/stage1/codegen.cpp +++ b/src/stage1/codegen.cpp @@ -6481,6 +6481,55 @@ static LLVMValueRef ir_render_cmpxchg(CodeGen *g, Stage1Air *executable, Stage1A return result_loc; } +static LLVMValueRef ir_render_reduced_call(CodeGen *g, LLVMValueRef llvm_fn, LLVMValueRef operand_vector, size_t vector_len, LLVMValueRef accum_init, ZigType *accum_ty) { + LLVMTypeRef llvm_usize_ty = g->builtin_types.entry_usize->llvm_type; + LLVMValueRef llvm_vector_len = LLVMConstInt(llvm_usize_ty, vector_len, false); + LLVMTypeRef llvm_result_ty = LLVMTypeOf(accum_init); + + // Allocate and initialize our mutable variables + LLVMValueRef i_ptr = build_alloca(g, g->builtin_types.entry_usize, "i", 0); + LLVMBuildStore(g->builder, LLVMConstInt(llvm_usize_ty, 0, false), i_ptr); + LLVMValueRef accum_ptr = build_alloca(g, accum_ty, "accum", 0); + LLVMBuildStore(g->builder, accum_init, accum_ptr); + + // Setup the loop + LLVMBasicBlockRef loop = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoop"); + LLVMBasicBlockRef loop_exit = LLVMAppendBasicBlock(g->cur_fn_val, "AfterReduce"); + LLVMBuildBr(g->builder, loop); + { + LLVMPositionBuilderAtEnd(g->builder, loop); + + // while (i < vec.len) + LLVMValueRef i = LLVMBuildLoad2(g->builder, llvm_usize_ty, i_ptr, ""); + LLVMValueRef cond = LLVMBuildICmp(g->builder, LLVMIntULT, i, llvm_vector_len, ""); + LLVMBasicBlockRef loop_then = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoopThen"); + + LLVMBuildCondBr(g->builder, cond, loop_then, loop_exit); + + { + LLVMPositionBuilderAtEnd(g->builder, loop_then); + + // accum = f(accum, vec[i]); + LLVMValueRef accum = LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, ""); + LLVMValueRef element = LLVMBuildExtractElement(g->builder, operand_vector, i, ""); + LLVMValueRef params[] { + accum, + element + }; + LLVMValueRef new_accum = LLVMBuildCall2(g->builder, LLVMGlobalGetValueType(llvm_fn), llvm_fn, params, 2, ""); + LLVMBuildStore(g->builder, new_accum, accum_ptr); + + // i += 1 + LLVMValueRef new_i = LLVMBuildAdd(g->builder, i, LLVMConstInt(llvm_usize_ty, 1, false), ""); + LLVMBuildStore(g->builder, new_i, i_ptr); + LLVMBuildBr(g->builder, loop); + } + } + + LLVMPositionBuilderAtEnd(g->builder, loop_exit); + return LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, ""); +} + static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1AirInstReduce *instruction) { LLVMValueRef value = ir_llvm_value(g, instruction->value); @@ -6488,61 +6537,100 @@ static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1Ai assert(value_type->id == ZigTypeIdVector); ZigType *scalar_type = value_type->data.vector.elem_type; + bool float_intrinsics_allowed = true; + const char *compiler_rt_type_abbrev = nullptr; + const char *math_float_prefix = nullptr; + const char *math_float_suffix = nullptr; + if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) || + (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) || + (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) { + float_intrinsics_allowed = false; + compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(scalar_type); + math_float_prefix = libc_float_prefix(g, scalar_type); + math_float_suffix = libc_float_suffix(g, scalar_type); + } + ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, &instruction->base)); - LLVMValueRef result_val; + char fn_name[64]; + ZigValue *init_value = nullptr; switch (instruction->op) { case ReduceOp_and: assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool); - result_val = ZigLLVMBuildAndReduce(g->builder, value); + return ZigLLVMBuildAndReduce(g->builder, value); break; case ReduceOp_or: assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool); - result_val = ZigLLVMBuildOrReduce(g->builder, value); + return ZigLLVMBuildOrReduce(g->builder, value); break; case ReduceOp_xor: assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool); - result_val = ZigLLVMBuildXorReduce(g->builder, value); + return ZigLLVMBuildXorReduce(g->builder, value); break; case ReduceOp_min: { if (scalar_type->id == ZigTypeIdInt) { const bool is_signed = scalar_type->data.integral.is_signed; - result_val = ZigLLVMBuildIntMinReduce(g->builder, value, is_signed); + return ZigLLVMBuildIntMinReduce(g->builder, value, is_signed); } else if (scalar_type->id == ZigTypeIdFloat) { - result_val = ZigLLVMBuildFPMinReduce(g->builder, value); + if (float_intrinsics_allowed) { + return ZigLLVMBuildFPMinReduce(g->builder, value); + } else { + snprintf(fn_name, sizeof(fn_name), "%sfmin%s", math_float_prefix, math_float_suffix); + init_value = create_const_float(g, scalar_type, NAN); + } } else zig_unreachable(); } break; case ReduceOp_max: { if (scalar_type->id == ZigTypeIdInt) { const bool is_signed = scalar_type->data.integral.is_signed; - result_val = ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed); + return ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed); } else if (scalar_type->id == ZigTypeIdFloat) { - result_val = ZigLLVMBuildFPMaxReduce(g->builder, value); + if (float_intrinsics_allowed) { + return ZigLLVMBuildFPMaxReduce(g->builder, value); + } else { + snprintf(fn_name, sizeof(fn_name), "%sfmax%s", math_float_prefix, math_float_suffix); + init_value = create_const_float(g, scalar_type, NAN); + } } else zig_unreachable(); } break; case ReduceOp_add: { if (scalar_type->id == ZigTypeIdInt) { - result_val = ZigLLVMBuildAddReduce(g->builder, value); + return ZigLLVMBuildAddReduce(g->builder, value); } else if (scalar_type->id == ZigTypeIdFloat) { - LLVMValueRef neutral_value = LLVMConstReal( - get_llvm_type(g, scalar_type), -0.0); - result_val = ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value); + if (float_intrinsics_allowed) { + LLVMValueRef neutral_value = LLVMConstReal( + get_llvm_type(g, scalar_type), -0.0); + return ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value); + } else { + snprintf(fn_name, sizeof(fn_name), "__add%sf3", compiler_rt_type_abbrev); + init_value = create_const_float(g, scalar_type, 0.0); + } } else zig_unreachable(); } break; case ReduceOp_mul: { if (scalar_type->id == ZigTypeIdInt) { - result_val = ZigLLVMBuildMulReduce(g->builder, value); + return ZigLLVMBuildMulReduce(g->builder, value); } else if (scalar_type->id == ZigTypeIdFloat) { - LLVMValueRef neutral_value = LLVMConstReal( - get_llvm_type(g, scalar_type), 1.0); - result_val = ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value); + if (float_intrinsics_allowed) { + LLVMValueRef neutral_value = LLVMConstReal( + get_llvm_type(g, scalar_type), 1.0); + return ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value); + } else { + snprintf(fn_name, sizeof(fn_name), "__mul%sf3", compiler_rt_type_abbrev); + init_value = create_const_float(g, scalar_type, 1.0); + } } else zig_unreachable(); } break; default: zig_unreachable(); } - return result_val; + + LLVMValueRef llvm_init_value = gen_const_val(g, init_value, ""); + uint32_t vector_len = value_type->data.vector.len; + LLVMTypeRef llvm_scalar_type = get_llvm_type(g, scalar_type); + const LLVMValueRef llvm_fn = get_soft_float_fn(g, fn_name, 2, llvm_scalar_type, llvm_scalar_type); + return ir_render_reduced_call(g, llvm_fn, value, vector_len, llvm_init_value, scalar_type); } static LLVMValueRef ir_render_fence(CodeGen *g, Stage1Air *executable, Stage1AirInstFence *instruction) { diff --git a/src/stage1/softfloat.hpp b/src/stage1/softfloat.hpp index a0d270d55f..b9d886d311 100644 --- a/src/stage1/softfloat.hpp +++ b/src/stage1/softfloat.hpp @@ -21,6 +21,20 @@ static inline float16_t zig_double_to_f16(double x) { return f64_to_f16(y); } +static inline void zig_double_to_extF80M(double x, extFloat80_t *result) { + float64_t y; + static_assert(sizeof(x) == sizeof(y), ""); + memcpy(&y, &x, sizeof(x)); + f64_to_extF80M(y, result); +} + +static inline void zig_double_to_f128M(double x, float128_t *result) { + float64_t y; + static_assert(sizeof(x) == sizeof(y), ""); + memcpy(&y, &x, sizeof(x)); + f64_to_f128M(y, result); +} + // Return value is safe to coerce to float even when |x| is NaN or Infinity. static inline double zig_f16_to_double(float16_t x) {