diff --git a/src/Air.zig b/src/Air.zig index 722ea28305..99065f83fe 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -935,6 +935,17 @@ pub const Inst = struct { /// type is the vector element type. legalize_vec_elem_val, + /// A call to a compiler_rt routine. `Legalize` may emit this instruction if any soft-float + /// legalizations are enabled. + /// + /// Uses the `legalize_compiler_rt_call` union field. + /// + /// The name of the function symbol is given by `func.name(target)`. + /// The calling convention is given by `func.@"callconv"(target)`. + /// The return type (and hence the result type of this instruction) is `func.returnType()`. + /// The parameter types are the types of the arguments given in `Air.Call`. + legalize_compiler_rt_call, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1240,6 +1251,11 @@ pub const Inst = struct { ty: InternPool.Index, nav: InternPool.Nav.Index, }, + legalize_compiler_rt_call: struct { + func: CompilerRtFunc, + /// Index into `extra` to a payload of type `Call`. + payload: u32, + }, inferred_alloc_comptime: InferredAllocComptime, inferred_alloc: InferredAlloc, @@ -1756,6 +1772,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .work_group_id, => return .u32, + .legalize_compiler_rt_call => return datas[@intFromEnum(inst)].legalize_compiler_rt_call.func.returnType(), + .inferred_alloc => unreachable, .inferred_alloc_comptime => unreachable, } @@ -1879,6 +1897,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .int_from_float_safe, .int_from_float_optimized_safe, .legalize_vec_store_elem, + .legalize_compiler_rt_call, => true, .add, @@ -2192,3 +2211,338 @@ pub const CoveragePoint = enum(u1) { /// a source location used for coverage instrumentation. poi, }; + +pub const CompilerRtFunc = enum(u32) { + // zig fmt: off + + // float simple arithmetic + __addhf3, __addsf3, __adddf3, __addxf3, __addtf3, + __subhf3, __subsf3, __subdf3, __subxf3, __subtf3, + __mulhf3, __mulsf3, __muldf3, __mulxf3, __multf3, + __divhf3, __divsf3, __divdf3, __divxf3, __divtf3, + + // float minmax + __fminh, fminf, fmin, __fminx, fminq, + __fmaxh, fmaxf, fmax, __fmaxx, fmaxq, + + // float round + __ceilh, ceilf, ceil, __ceilx, ceilq, + __floorh, floorf, floor, __floorx, floorq, + __trunch, truncf, trunc, __truncx, truncq, + __roundh, roundf, round, __roundx, roundq, + + // float log + __logh, logf, log, __logx, logq, + __log2h, log2f, log2, __log2x, log2q, + __log10h, log10f, log10, __log10x, log10q, + + // float exp + __exph, expf, exp, __expx, expq, + __exp2h, exp2f, exp2, __exp2x, exp2q, + + // float trigonometry + __sinh, sinf, sin, __sinx, sinq, + __cosh, cosf, cos, __cosx, cosq, + __tanh, tanf, tan, __tanx, tanq, + + // float misc ops + __fabsh, fabsf, fabs, __fabsx, fabsq, + __sqrth, sqrtf, sqrt, __sqrtx, sqrtq, + __fmodh, fmodf, fmod, __fmodx, fmodq, + __fmah, fmaf, fma, __fmax, fmaq, + + // float comparison + __eqhf2, __eqsf2, __eqdf2, __eqxf2, __eqtf2, // == iff return == 0 + __nehf2, __nesf2, __nedf2, __nexf2, __netf2, // != iff return != 0 + __lthf2, __ltsf2, __ltdf2, __ltxf2, __lttf2, // < iff return < 0 + __lehf2, __lesf2, __ledf2, __lexf2, __letf2, // <= iff return <= 0 + __gthf2, __gtsf2, __gtdf2, __gtxf2, __gttf2, // > iff return > 0 + __gehf2, __gesf2, __gedf2, __gexf2, __getf2, // >= iff return >= 0 + + // AEABI float comparison. On ARM, the `sf`/`df` functions above are not available, + // and these must be used instead. They are not just aliases for the above functions + // because they have a different (better) ABI. + __aeabi_fcmpeq, __aeabi_dcmpeq, // ==, returns bool + __aeabi_fcmplt, __aeabi_dcmplt, // <, returns bool + __aeabi_fcmple, __aeabi_dcmple, // <=, returns bool + __aeabi_fcmpgt, __aeabi_dcmpgt, // >, returns bool + __aeabi_fcmpge, __aeabi_dcmpge, // >=, returns bool + + // float shortening + // to f16 // to f32 // to f64 // to f80 + __trunctfhf2, __trunctfsf2, __trunctfdf2, __trunctfxf2, // from f128 + __truncxfhf2, __truncxfsf2, __truncxfdf2, // from f80 + __truncdfhf2, __truncdfsf2, // from f64 + __truncsfhf2, // from f32 + + // float widening + // to f128 // to f80 // to f64 // to f32 + __extendhftf2, __extendhfxf2, __extendhfdf2, __extendhfsf2, // from f16 + __extendsftf2, __extendsfxf2, __extendsfdf2, // from f32 + __extenddftf2, __extenddfxf2, // from f64 + __extendxftf2, // from f80 + + // int to float + __floatsihf, __floatsisf, __floatsidf, __floatsixf, __floatsitf, // i32 to float + __floatdihf, __floatdisf, __floatdidf, __floatdixf, __floatditf, // i64 to float + __floattihf, __floattisf, __floattidf, __floattixf, __floattitf, // i128 to float + __floateihf, __floateisf, __floateidf, __floateixf, __floateitf, // arbitrary iN to float + __floatunsihf, __floatunsisf, __floatunsidf, __floatunsixf, __floatunsitf, // u32 to float + __floatundihf, __floatundisf, __floatundidf, __floatundixf, __floatunditf, // u64 to float + __floatuntihf, __floatuntisf, __floatuntidf, __floatuntixf, __floatuntitf, // u128 to float + __floatuneihf, __floatuneisf, __floatuneidf, __floatuneixf, __floatuneitf, // arbitrary uN to float + + // float to int + __fixhfsi, __fixsfsi, __fixdfsi, __fixxfsi, __fixtfsi, // float to i32 + __fixhfdi, __fixsfdi, __fixdfdi, __fixxfdi, __fixtfdi, // float to i64 + __fixhfti, __fixsfti, __fixdfti, __fixxfti, __fixtfti, // float to i128 + __fixhfei, __fixsfei, __fixdfei, __fixxfei, __fixtfei, // float to arbitray iN + __fixunshfsi, __fixunssfsi, __fixunsdfsi, __fixunsxfsi, __fixunstfsi, // float to u32 + __fixunshfdi, __fixunssfdi, __fixunsdfdi, __fixunsxfdi, __fixunstfdi, // float to u64 + __fixunshfti, __fixunssfti, __fixunsdfti, __fixunsxfti, __fixunstfti, // float to u128 + __fixunshfei, __fixunssfei, __fixunsdfei, __fixunsxfei, __fixunstfei, // float to arbitray uN + + // zig fmt: on + + /// Usually, the tag names of `CompilerRtFunc` match the corresponding symbol name, but not + /// always; some target triples have slightly different compiler-rt ABIs for one reason or + /// another. + pub fn name(f: CompilerRtFunc, target: *const std.Target) []const u8 { + const use_gnu_f16_abi = switch (target.cpu.arch) { + .wasm32, + .wasm64, + .riscv64, + .riscv64be, + .riscv32, + .riscv32be, + => false, + .x86, .x86_64 => true, + .arm, .armeb, .thumb, .thumbeb => switch (target.abi) { + .eabi, .eabihf => false, + else => true, + }, + else => !target.os.tag.isDarwin(), + }; + const use_aeabi = target.cpu.arch.isArm() and switch (target.abi) { + .eabi, + .eabihf, + .musleabi, + .musleabihf, + .gnueabi, + .gnueabihf, + .android, + .androideabi, + => true, + else => false, + }; + + // GNU didn't like the standard names specifically for conversions between f16 + // and f32, so decided to make their own naming convention with blackjack and + // hookers (but only use it on a few random targets of course). This overrides + // the ARM EABI in some cases. I don't like GNU. + if (use_gnu_f16_abi) switch (f) { + .__truncsfhf2 => return "__gnu_f2h_ieee", + .__extendhfsf2 => return "__gnu_h2f_ieee", + else => {}, + }; + + if (use_aeabi) return switch (f) { + .__addsf3 => "__aeabi_fadd", + .__adddf3 => "__aeabi_dadd", + .__subsf3 => "__aeabi_fsub", + .__subdf3 => "__aeabi_dsub", + .__mulsf3 => "__aeabi_fmul", + .__muldf3 => "__aeabi_dmul", + .__divsf3 => "__aeabi_fdiv", + .__divdf3 => "__aeabi_ddiv", + .__truncdfhf2 => "__aeabi_d2h", + .__truncdfsf2 => "__aeabi_d2f", + .__truncsfhf2 => "__aeabi_f2h", + .__extendsfdf2 => "__aeabi_f2d", + .__extendhfsf2 => "__aeabi_h2f", + .__floatsisf => "__aeabi_i2f", + .__floatsidf => "__aeabi_i2d", + .__floatdisf => "__aeabi_l2f", + .__floatdidf => "__aeabi_l2d", + .__floatunsisf => "__aeabi_ui2f", + .__floatunsidf => "__aeabi_ui2d", + .__floatundisf => "__aeabi_ul2f", + .__floatundidf => "__aeabi_ul2d", + .__fixsfsi => "__aeabi_f2iz", + .__fixdfsi => "__aeabi_d2iz", + .__fixsfdi => "__aeabi_f2lz", + .__fixdfdi => "__aeabi_d2lz", + .__fixunssfsi => "__aeabi_f2uiz", + .__fixunsdfsi => "__aeabi_d2uiz", + .__fixunssfdi => "__aeabi_f2ulz", + .__fixunsdfdi => "__aeabi_d2ulz", + + // These functions are not available on AEABI. The AEABI equivalents are + // separate fields rather than aliases because they have a different ABI. + .__eqsf2, .__eqdf2 => unreachable, + .__nesf2, .__nedf2 => unreachable, + .__ltsf2, .__ltdf2 => unreachable, + .__lesf2, .__ledf2 => unreachable, + .__gtsf2, .__gtdf2 => unreachable, + .__gesf2, .__gedf2 => unreachable, + + else => @tagName(f), + }; + + return switch (f) { + // These functions are only available on AEABI. + .__aeabi_fcmpeq, .__aeabi_dcmpeq => unreachable, + .__aeabi_fcmplt, .__aeabi_dcmplt => unreachable, + .__aeabi_fcmple, .__aeabi_dcmple => unreachable, + .__aeabi_fcmpgt, .__aeabi_dcmpgt => unreachable, + .__aeabi_fcmpge, .__aeabi_dcmpge => unreachable, + + else => @tagName(f), + }; + } + + pub fn @"callconv"(f: CompilerRtFunc, target: *const std.Target) std.builtin.CallingConvention { + const use_gnu_f16_abi = switch (target.cpu.arch) { + .wasm32, + .wasm64, + .riscv64, + .riscv64be, + .riscv32, + .riscv32be, + => false, + .x86, .x86_64 => true, + .arm, .armeb, .thumb, .thumbeb => switch (target.abi) { + .eabi, .eabihf => false, + else => true, + }, + else => !target.os.tag.isDarwin(), + }; + const use_aeabi = target.cpu.arch.isArm() and switch (target.abi) { + .eabi, + .eabihf, + .musleabi, + .musleabihf, + .gnueabi, + .gnueabihf, + .android, + .androideabi, + => true, + else => false, + }; + + if (use_gnu_f16_abi) switch (f) { + .__truncsfhf2, + .__extendhfsf2, + => return target.cCallingConvention().?, + else => {}, + }; + + if (use_aeabi) switch (f) { + // zig fmt: off + .__addsf3, .__adddf3, .__subsf3, .__subdf3, + .__mulsf3, .__muldf3, .__divsf3, .__divdf3, + .__truncdfhf2, .__truncdfsf2, .__truncsfhf2, + .__extendsfdf2, .__extendhfsf2, + .__floatsisf, .__floatsidf, .__floatdisf, .__floatdidf, + .__floatunsisf, .__floatunsidf, .__floatundisf, .__floatundidf, + .__fixsfsi, .__fixdfsi, .__fixsfdi, .__fixdfdi, + .__fixunssfsi, .__fixunsdfsi, .__fixunssfdi, .__fixunsdfdi, + => return .{ .arm_aapcs = .{} }, + // zig fmt: on + else => {}, + }; + + return target.cCallingConvention().?; + } + + pub fn returnType(f: CompilerRtFunc) Type { + return switch (f) { + .__addhf3, .__subhf3, .__mulhf3, .__divhf3 => .f16, + .__addsf3, .__subsf3, .__mulsf3, .__divsf3 => .f32, + .__adddf3, .__subdf3, .__muldf3, .__divdf3 => .f64, + .__addxf3, .__subxf3, .__mulxf3, .__divxf3 => .f80, + .__addtf3, .__subtf3, .__multf3, .__divtf3 => .f128, + + // zig fmt: off + .__fminh, .__fmaxh, + .__ceilh, .__floorh, .__trunch, .__roundh, + .__logh, .__log2h, .__log10h, + .__exph, .__exp2h, + .__sinh, .__cosh, .__tanh, + .__fabsh, .__sqrth, .__fmodh, .__fmah, + => .f16, + .fminf, .fmaxf, + .ceilf, .floorf, .truncf, .roundf, + .logf, .log2f, .log10f, + .expf, .exp2f, + .sinf, .cosf, .tanf, + .fabsf, .sqrtf, .fmodf, .fmaf, + => .f32, + .fmin, .fmax, + .ceil, .floor, .trunc, .round, + .log, .log2, .log10, + .exp, .exp2, + .sin, .cos, .tan, + .fabs, .sqrt, .fmod, .fma, + => .f64, + .__fminx, .__fmaxx, + .__ceilx, .__floorx, .__truncx, .__roundx, + .__logx, .__log2x, .__log10x, + .__expx, .__exp2x, + .__sinx, .__cosx, .__tanx, + .__fabsx, .__sqrtx, .__fmodx, .__fmax, + => .f80, + .fminq, .fmaxq, + .ceilq, .floorq, .truncq, .roundq, + .logq, .log2q, .log10q, + .expq, .exp2q, + .sinq, .cosq, .tanq, + .fabsq, .sqrtq, .fmodq, .fmaq, + => .f128, + // zig fmt: on + + .__eqhf2, .__eqsf2, .__eqdf2, .__eqxf2, .__eqtf2 => .i32, + .__nehf2, .__nesf2, .__nedf2, .__nexf2, .__netf2 => .i32, + .__lthf2, .__ltsf2, .__ltdf2, .__ltxf2, .__lttf2 => .i32, + .__lehf2, .__lesf2, .__ledf2, .__lexf2, .__letf2 => .i32, + .__gthf2, .__gtsf2, .__gtdf2, .__gtxf2, .__gttf2 => .i32, + .__gehf2, .__gesf2, .__gedf2, .__gexf2, .__getf2 => .i32, + + .__aeabi_fcmpeq, .__aeabi_dcmpeq => .i32, + .__aeabi_fcmplt, .__aeabi_dcmplt => .i32, + .__aeabi_fcmple, .__aeabi_dcmple => .i32, + .__aeabi_fcmpgt, .__aeabi_dcmpgt => .i32, + .__aeabi_fcmpge, .__aeabi_dcmpge => .i32, + + .__trunctfhf2, .__truncxfhf2, .__truncdfhf2, .__truncsfhf2 => .f16, + .__trunctfsf2, .__truncxfsf2, .__truncdfsf2 => .f32, + .__trunctfdf2, .__truncxfdf2 => .f64, + .__trunctfxf2 => .f80, + + .__extendhftf2, .__extendsftf2, .__extenddftf2, .__extendxftf2 => .f128, + .__extendhfxf2, .__extendsfxf2, .__extenddfxf2 => .f80, + .__extendhfdf2, .__extendsfdf2 => .f64, + .__extendhfsf2 => .f32, + + .__floatsihf, .__floatdihf, .__floattihf, .__floateihf => .f16, + .__floatsisf, .__floatdisf, .__floattisf, .__floateisf => .f32, + .__floatsidf, .__floatdidf, .__floattidf, .__floateidf => .f64, + .__floatsixf, .__floatdixf, .__floattixf, .__floateixf => .f80, + .__floatsitf, .__floatditf, .__floattitf, .__floateitf => .f128, + .__floatunsihf, .__floatundihf, .__floatuntihf, .__floatuneihf => .f16, + .__floatunsisf, .__floatundisf, .__floatuntisf, .__floatuneisf => .f32, + .__floatunsidf, .__floatundidf, .__floatuntidf, .__floatuneidf => .f64, + .__floatunsixf, .__floatundixf, .__floatuntixf, .__floatuneixf => .f80, + .__floatunsitf, .__floatunditf, .__floatuntitf, .__floatuneitf => .f128, + + .__fixhfsi, .__fixsfsi, .__fixdfsi, .__fixxfsi, .__fixtfsi => .i32, + .__fixhfdi, .__fixsfdi, .__fixdfdi, .__fixxfdi, .__fixtfdi => .i64, + .__fixhfti, .__fixsfti, .__fixdfti, .__fixxfti, .__fixtfti => .i128, + .__fixhfei, .__fixsfei, .__fixdfei, .__fixxfei, .__fixtfei => .void, + .__fixunshfsi, .__fixunssfsi, .__fixunsdfsi, .__fixunsxfsi, .__fixunstfsi => .u32, + .__fixunshfdi, .__fixunssfdi, .__fixunsdfdi, .__fixunsxfdi, .__fixunstfdi => .u64, + .__fixunshfti, .__fixunssfti, .__fixunsdfti, .__fixunsxfti, .__fixunstfti => .u128, + .__fixunshfei, .__fixunssfei, .__fixunsdfei, .__fixunsxfei, .__fixunstfei => .void, + }; + } +}; diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig index 1d935bd360..7657bdd479 100644 --- a/src/Air/Legalize.zig +++ b/src/Air/Legalize.zig @@ -115,6 +115,8 @@ pub const Feature = enum { scalarize_int_from_float_safe, scalarize_int_from_float_optimized_safe, scalarize_float_from_int, + scalarize_reduce, + scalarize_reduce_optimized, scalarize_shuffle_one, scalarize_shuffle_two, scalarize_select, @@ -159,6 +161,27 @@ pub const Feature = enum { /// Replace `aggregate_init` of a packed struct with a sequence of `shl_exact`, `bitcast`, `intcast`, and `bit_or`. expand_packed_aggregate_init, + /// Replace all arithmetic operations on 16-bit floating-point types with calls to soft-float + /// routines in compiler_rt, including `fptrunc`/`fpext`/`float_from_int`/`int_from_float` + /// where the operand or target type is a 16-bit floating-point type. This feature implies: + /// + /// * scalarization of 16-bit float vector operations + /// * expansion of safety-checked 16-bit float operations + /// + /// If this feature is enabled, the following AIR instruction tags may be emitted: + /// * `.legalize_vec_elem_val` + /// * `.legalize_vec_store_elem` + /// * `.legalize_compiler_rt_call` + soft_f16, + /// Like `soft_f16`, but for 32-bit floating-point types. + soft_f32, + /// Like `soft_f16`, but for 64-bit floating-point types. + soft_f64, + /// Like `soft_f16`, but for 80-bit floating-point types. + soft_f80, + /// Like `soft_f16`, but for 128-bit floating-point types. + soft_f128, + fn scalarize(tag: Air.Inst.Tag) Feature { return switch (tag) { else => unreachable, @@ -238,6 +261,8 @@ pub const Feature = enum { .int_from_float_safe => .scalarize_int_from_float_safe, .int_from_float_optimized_safe => .scalarize_int_from_float_optimized_safe, .float_from_int => .scalarize_float_from_int, + .reduce => .scalarize_reduce, + .reduce_optimized => .scalarize_reduce_optimized, .shuffle_one => .scalarize_shuffle_one, .shuffle_two => .scalarize_shuffle_two, .select => .scalarize_select, @@ -283,6 +308,10 @@ fn extraData(l: *const Legalize, comptime T: type, index: usize) @TypeOf(Air.ext } fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { + // In zig1, this function needs a lot of eval branch quota, because all of the inlined feature + // checks are comptime-evaluated (to ensure unused features are not included in the binary). + @setEvalBranchQuota(4000); + const zcu = l.pt.zcu; const ip = &zcu.intern_pool; for (0..body_len) |body_index| { @@ -291,30 +320,67 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .arg => {}, inline .add, .add_optimized, - .add_wrap, - .add_sat, .sub, .sub_optimized, - .sub_wrap, - .sub_sat, .mul, .mul_optimized, - .mul_wrap, - .mul_sat, .div_float, .div_float_optimized, - .div_trunc, - .div_trunc_optimized, - .div_floor, - .div_floor_optimized, .div_exact, .div_exact_optimized, .rem, .rem_optimized, - .mod, - .mod_optimized, - .max, .min, + .max, + => |air_tag| { + const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; + const ty = l.typeOf(bin_op.lhs); + switch (l.wantScalarizeOrSoftFloat(air_tag, ty)) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)), + .soft_float => continue :inst try l.compilerRtCall( + inst, + softFloatFunc(air_tag, ty, zcu), + &.{ bin_op.lhs, bin_op.rhs }, + l.typeOf(bin_op.lhs), + ), + } + }, + inline .div_trunc, + .div_trunc_optimized, + .div_floor, + .div_floor_optimized, + => |air_tag| { + const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; + switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(bin_op.lhs))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)), + .soft_float => continue :inst l.replaceInst(inst, .block, try l.softFloatDivTruncFloorBlockPayload( + inst, + bin_op.lhs, + bin_op.rhs, + air_tag, + )), + } + }, + inline .mod, .mod_optimized => |air_tag| { + const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; + switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(bin_op.lhs))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)), + .soft_float => continue :inst l.replaceInst(inst, .block, try l.softFloatModBlockPayload( + inst, + bin_op.lhs, + bin_op.rhs, + )), + } + }, + inline .add_wrap, + .add_sat, + .sub_wrap, + .sub_sat, + .mul_wrap, + .mul_sat, .bit_and, .bit_or, .xor, @@ -408,20 +474,80 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .popcount, .byte_swap, .bit_reverse, - .abs, - .fptrunc, - .fpext, .intcast, .trunc, - .int_from_float, - .int_from_float_optimized, - .float_from_int, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; if (ty_op.ty.toType().isVector(zcu)) { continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); } }, + .abs => { + const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; + switch (l.wantScalarizeOrSoftFloat(.abs, ty_op.ty.toType())) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)), + .soft_float => continue :inst try l.compilerRtCall( + inst, + softFloatFunc(.abs, ty_op.ty.toType(), zcu), + &.{ty_op.operand}, + ty_op.ty.toType(), + ), + } + }, + .fptrunc => { + const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; + const src_ty = l.typeOf(ty_op.operand); + const dest_ty = ty_op.ty.toType(); + if (src_ty.zigTypeTag(zcu) == .vector) { + if (l.features.has(.scalarize_fptrunc) or + l.wantSoftFloatScalar(src_ty.childType(zcu)) or + l.wantSoftFloatScalar(dest_ty.childType(zcu))) + { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } + } else if (l.wantSoftFloatScalar(src_ty) or l.wantSoftFloatScalar(dest_ty)) { + continue :inst try l.compilerRtCall(inst, l.softFptruncFunc(src_ty, dest_ty), &.{ty_op.operand}, dest_ty); + } + }, + .fpext => { + const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; + const src_ty = l.typeOf(ty_op.operand); + const dest_ty = ty_op.ty.toType(); + if (src_ty.zigTypeTag(zcu) == .vector) { + if (l.features.has(.scalarize_fpext) or + l.wantSoftFloatScalar(src_ty.childType(zcu)) or + l.wantSoftFloatScalar(dest_ty.childType(zcu))) + { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } + } else if (l.wantSoftFloatScalar(src_ty) or l.wantSoftFloatScalar(dest_ty)) { + continue :inst try l.compilerRtCall(inst, l.softFpextFunc(src_ty, dest_ty), &.{ty_op.operand}, dest_ty); + } + }, + inline .int_from_float, .int_from_float_optimized => |air_tag| { + const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; + switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(ty_op.operand))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)), + .soft_float => switch (try l.softIntFromFloat(inst)) { + .call => |func| continue :inst try l.compilerRtCall(inst, func, &.{ty_op.operand}, ty_op.ty.toType()), + .block_payload => |data| continue :inst l.replaceInst(inst, .block, data), + }, + } + }, + .float_from_int => { + const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; + const dest_ty = ty_op.ty.toType(); + switch (l.wantScalarizeOrSoftFloat(.float_from_int, dest_ty)) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)), + .soft_float => switch (try l.softFloatFromInt(inst)) { + .call => |func| continue :inst try l.compilerRtCall(inst, func, &.{ty_op.operand}, dest_ty), + .block_payload => |data| continue :inst l.replaceInst(inst, .block, data), + }, + } + }, .bitcast => if (l.features.has(.scalarize_bitcast)) { if (try l.scalarizeBitcastBlockPayload(inst)) |payload| { continue :inst l.replaceInst(inst, .block, payload); @@ -436,22 +562,25 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); } }, - .int_from_float_safe => if (l.features.has(.expand_int_from_float_safe)) { - assert(!l.features.has(.scalarize_int_from_float_safe)); - continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, false)); - } else if (l.features.has(.scalarize_int_from_float_safe)) { - const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + inline .int_from_float_safe, + .int_from_float_optimized_safe, + => |air_tag| { + const optimized = air_tag == .int_from_float_optimized_safe; + const expand_feature = switch (air_tag) { + .int_from_float_safe => .expand_int_from_float_safe, + .int_from_float_optimized_safe => .expand_int_from_float_optimized_safe, + else => unreachable, + }; + if (l.features.has(expand_feature)) { + assert(!l.features.has(.scalarize(air_tag))); + continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, optimized)); } - }, - .int_from_float_optimized_safe => if (l.features.has(.expand_int_from_float_optimized_safe)) { - assert(!l.features.has(.scalarize_int_from_float_optimized_safe)); - continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, true)); - } else if (l.features.has(.scalarize_int_from_float_optimized_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(ty_op.operand))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)), + // Expand the safety check so that soft-float can rewrite the unchecked operation. + .soft_float => continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, optimized)), } }, .block, .loop => { @@ -483,12 +612,26 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .ceil, .round, .trunc_float, - .neg, - .neg_optimized, - => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { - const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op; - if (l.typeOf(un_op).isVector(zcu)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)); + => |air_tag| { + const operand = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op; + const ty = l.typeOf(operand); + switch (l.wantScalarizeOrSoftFloat(air_tag, ty)) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)), + .soft_float => continue :inst try l.compilerRtCall( + inst, + softFloatFunc(air_tag, ty, zcu), + &.{operand}, + l.typeOf(operand), + ), + } + }, + inline .neg, .neg_optimized => |air_tag| { + const operand = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op; + switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(operand))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)), + .soft_float => continue :inst l.replaceInst(inst, .block, try l.softFloatNegBlockPayload(inst, operand)), } }, .cmp_lt, @@ -503,11 +646,24 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .cmp_gt_optimized, .cmp_neq, .cmp_neq_optimized, - => {}, - inline .cmp_vector, .cmp_vector_optimized => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { + => |air_tag| { + const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; + const ty = l.typeOf(bin_op.lhs); + if (l.wantSoftFloatScalar(ty)) { + continue :inst l.replaceInst( + inst, + .block, + try l.softFloatCmpBlockPayload(inst, ty, air_tag.toCmpOp().?, bin_op.lhs, bin_op.rhs), + ); + } + }, + inline .cmp_vector, .cmp_vector_optimized => |air_tag| { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; - if (ty_pl.ty.toType().isVector(zcu)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector)); + const payload = l.extraData(Air.VectorCmp, ty_pl.payload).data; + switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(payload.lhs))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector)), + .soft_float => unreachable, // the operand is not a scalar } }, .cond_br => { @@ -615,16 +771,27 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .ptr_elem_ptr, .array_to_slice, => {}, - .reduce, .reduce_optimized => if (l.features.has(.reduce_one_elem_to_bitcast)) { + inline .reduce, .reduce_optimized => |air_tag| { const reduce = l.air_instructions.items(.data)[@intFromEnum(inst)].reduce; const vector_ty = l.typeOf(reduce.operand); - switch (vector_ty.vectorLen(zcu)) { - 0 => unreachable, - 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ - .ty = .fromType(vector_ty.childType(zcu)), - .operand = reduce.operand, - } }), - else => {}, + if (l.features.has(.reduce_one_elem_to_bitcast)) { + switch (vector_ty.vectorLen(zcu)) { + 0 => unreachable, + 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ + .ty = .fromType(vector_ty.childType(zcu)), + .operand = reduce.operand, + } }), + else => {}, + } + } + switch (l.wantScalarizeOrSoftFloat(air_tag, vector_ty)) { + .none => {}, + .scalarize => continue :inst l.replaceInst( + inst, + .block, + try l.scalarizeReduceBlockPayload(inst, air_tag == .reduce_optimized), + ), + .soft_float => unreachable, // the operand is not a scalar } }, .splat => if (l.features.has(.splat_one_elem_to_bitcast)) { @@ -638,14 +805,30 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { else => {}, } }, - .shuffle_one => if (l.features.has(.scalarize_shuffle_one)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst)); + .shuffle_one => { + const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; + switch (l.wantScalarizeOrSoftFloat(.shuffle_one, ty_pl.ty.toType())) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst)), + .soft_float => unreachable, // the operand is not a scalar + } }, - .shuffle_two => if (l.features.has(.scalarize_shuffle_two)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst)); + .shuffle_two => { + const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; + switch (l.wantScalarizeOrSoftFloat(.shuffle_two, ty_pl.ty.toType())) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst)), + .soft_float => unreachable, // the operand is not a scalar + } }, - .select => if (l.features.has(.scalarize_select)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select)); + .select => { + const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; + const bin = l.extraData(Air.Bin, pl_op.payload).data; + switch (l.wantScalarizeOrSoftFloat(.select, l.typeOf(bin.lhs))) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select)), + .soft_float => unreachable, // the operand is not a scalar + } }, .memset, .memset_safe, @@ -685,10 +868,17 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { } }, .union_init, .prefetch => {}, - .mul_add => if (l.features.has(.scalarize_mul_add)) { + .mul_add => { const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; - if (l.typeOf(pl_op.operand).isVector(zcu)) { - continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin)); + const ty = l.typeOf(pl_op.operand); + switch (l.wantScalarizeOrSoftFloat(.mul_add, ty)) { + .none => {}, + .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin)), + .soft_float => { + const bin = l.extraData(Air.Bin, pl_op.payload).data; + const func = softFloatFunc(.mul_add, ty, zcu); + continue :inst try l.compilerRtCall(inst, func, &.{ bin.lhs, bin.rhs, pl_op.operand }, ty); + }, } }, .field_parent_ptr, @@ -709,6 +899,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .work_group_id, .legalize_vec_elem_val, .legalize_vec_store_elem, + .legalize_compiler_rt_call, => {}, } } @@ -1606,6 +1797,128 @@ fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error! .payload = try l.addBlockBody(main_block.body()), } }; } +fn scalarizeReduceBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimized: bool) Error!Air.Inst.Data { + const pt = l.pt; + const zcu = pt.zcu; + + const reduce = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].reduce; + + const vector_ty = l.typeOf(reduce.operand); + const scalar_ty = vector_ty.childType(zcu); + + const ident_val: Value = switch (reduce.operation) { + // identity for add is 0; identity for OR and XOR is all 0 bits + .Or, .Xor, .Add => switch (scalar_ty.zigTypeTag(zcu)) { + .int => try pt.intValue(scalar_ty, 0), + .float => try pt.floatValue(scalar_ty, 0.0), + else => unreachable, + }, + // identity for multiplication is 1 + .Mul => switch (scalar_ty.zigTypeTag(zcu)) { + .int => try pt.intValue(scalar_ty, 1), + .float => try pt.floatValue(scalar_ty, 1.0), + else => unreachable, + }, + // identity for AND is all 1 bits + .And => switch (scalar_ty.intInfo(zcu).signedness) { + .unsigned => try scalar_ty.maxIntScalar(pt, scalar_ty), + .signed => try pt.intValue(scalar_ty, -1), + }, + // identity for @min is maximum value + .Min => switch (scalar_ty.zigTypeTag(zcu)) { + .int => try scalar_ty.maxIntScalar(pt, scalar_ty), + .float => try pt.floatValue(scalar_ty, std.math.inf(f32)), + else => unreachable, + }, + // identity for @max is minimum value + .Max => switch (scalar_ty.zigTypeTag(zcu)) { + .int => try scalar_ty.minIntScalar(pt, scalar_ty), + .float => try pt.floatValue(scalar_ty, -std.math.inf(f32)), + else => unreachable, + }, + }; + + const op_tag: Air.Inst.Tag = switch (reduce.operation) { + .Or => .bit_or, + .And => .bit_and, + .Xor => .xor, + .Min => .min, + .Max => .max, + .Add => switch (scalar_ty.zigTypeTag(zcu)) { + .int => .add_wrap, + .float => if (optimized) .add_optimized else .add, + else => unreachable, + }, + .Mul => switch (scalar_ty.zigTypeTag(zcu)) { + .int => .mul_wrap, + .float => if (optimized) .mul_optimized else .mul, + else => unreachable, + }, + }; + + // %1 = block(Scalar, { + // %2 = alloc(*usize) + // %3 = alloc(*Scalar) + // %4 = store(%2, @zero_usize) + // %5 = store(%3, ) // or whatever the identity is for this operator + // %6 = loop({ + // %7 = load(%2) + // %8 = legalize_vec_elem_val(orig_operand, %7) + // %9 = load(%3) + // %10 = add(%8, %9) // or whatever the operator is + // %11 = cmp_eq(%7, ) + // %12 = cond_br(%11, { + // %13 = br(%1, %10) + // }, { + // %14 = store(%3, %10) + // %15 = add(%7, @one_usize) + // %16 = store(%2, %15) + // %17 = repeat(%6) + // }) + // }) + // }) + + var inst_buf: [16]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const accum_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(scalar_ty)).toRef(); + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + _ = main_block.addBinOp(l, .store, accum_ptr, .fromValue(ident_val)); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, reduce.operand, index_val).toRef(); + const old_accum = loop.block.addTyOp(l, .load, scalar_ty, accum_ptr).toRef(); + const new_accum = loop.block.addBinOp(l, op_tag, old_accum, elem_val).toRef(); + + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, vector_ty.vectorLen(zcu) - 1))).toRef(); + + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + condbr.then_block.addBr(l, orig_inst, new_accum); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + _ = condbr.else_block.addBinOp(l, .store, accum_ptr, new_accum); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); + + return .{ .ty_pl = .{ + .ty = .fromType(scalar_ty), + .payload = try l.addBlockBody(main_block.body()), + } }; +} fn safeIntcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; @@ -2298,6 +2611,22 @@ const Block = struct { }); } + fn addCompilerRtCall(b: *Block, l: *Legalize, func: Air.CompilerRtFunc, args: []const Air.Inst.Ref) Error!Air.Inst.Index { + return b.add(l, .{ + .tag = .legalize_compiler_rt_call, + .data = .{ .legalize_compiler_rt_call = .{ + .func = func, + .payload = payload: { + const extra_len = @typeInfo(Air.Call).@"struct".fields.len + args.len; + try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, extra_len); + const index = l.addExtra(Air.Call, .{ .args_len = @intCast(args.len) }) catch unreachable; + l.air_extra.appendSliceAssumeCapacity(@ptrCast(args)); + break :payload index; + }, + } }, + }); + } + /// Adds the code to call the panic handler `panic_id`. This is usually `.call` then `.unreach`, /// but if `Zcu.Feature.panic_fn` is unsupported, we lower to `.trap` instead. fn addPanic(b: *Block, l: *Legalize, panic_id: Zcu.SimplePanicId) Error!void { @@ -2365,14 +2694,7 @@ const Block = struct { optimized: bool, ) Air.Inst.Index { return b.add(l, .{ - .tag = switch (op) { - .lt => if (optimized) .cmp_lt_optimized else .cmp_lt, - .lte => if (optimized) .cmp_lte_optimized else .cmp_lte, - .eq => if (optimized) .cmp_eq_optimized else .cmp_eq, - .gte => if (optimized) .cmp_gte_optimized else .cmp_gte, - .gt => if (optimized) .cmp_gt_optimized else .cmp_gt, - .neq => if (optimized) .cmp_neq_optimized else .cmp_neq, - }, + .tag = .fromCmpOp(op, optimized), .data = .{ .bin_op = .{ .lhs = lhs, .rhs = rhs, @@ -2399,6 +2721,82 @@ const Block = struct { return operand; } + /// This function emits *two* instructions. + fn addSoftFloatCmp( + b: *Block, + l: *Legalize, + float_ty: Type, + op: std.math.CompareOperator, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, + ) Error!Air.Inst.Ref { + const pt = l.pt; + const target = pt.zcu.getTarget(); + const use_aeabi = target.cpu.arch.isArm() and switch (target.abi) { + .eabi, + .eabihf, + .musleabi, + .musleabihf, + .gnueabi, + .gnueabihf, + .android, + .androideabi, + => true, + else => false, + }; + const func: Air.CompilerRtFunc, const ret_cmp_op: std.math.CompareOperator = switch (float_ty.floatBits(target)) { + // zig fmt: off + 16 => switch (op) { + .eq => .{ .__eqhf2, .eq }, + .neq => .{ .__nehf2, .neq }, + .lt => .{ .__lthf2, .lt }, + .lte => .{ .__lehf2, .lte }, + .gt => .{ .__gthf2, .gt }, + .gte => .{ .__gehf2, .gte }, + }, + 32 => switch (op) { + .eq => if (use_aeabi) .{ .__aeabi_fcmpeq, .neq } else .{ .__eqsf2, .eq }, + .neq => if (use_aeabi) .{ .__aeabi_fcmpeq, .eq } else .{ .__nesf2, .neq }, + .lt => if (use_aeabi) .{ .__aeabi_fcmplt, .neq } else .{ .__ltsf2, .lt }, + .lte => if (use_aeabi) .{ .__aeabi_fcmple, .neq } else .{ .__lesf2, .lte }, + .gt => if (use_aeabi) .{ .__aeabi_fcmpgt, .neq } else .{ .__gtsf2, .gt }, + .gte => if (use_aeabi) .{ .__aeabi_fcmpge, .neq } else .{ .__gesf2, .gte }, + }, + 64 => switch (op) { + .eq => if (use_aeabi) .{ .__aeabi_dcmpeq, .neq } else .{ .__eqdf2, .eq }, + .neq => if (use_aeabi) .{ .__aeabi_dcmpeq, .eq } else .{ .__nedf2, .neq }, + .lt => if (use_aeabi) .{ .__aeabi_dcmplt, .neq } else .{ .__ltdf2, .lt }, + .lte => if (use_aeabi) .{ .__aeabi_dcmple, .neq } else .{ .__ledf2, .lte }, + .gt => if (use_aeabi) .{ .__aeabi_dcmpgt, .neq } else .{ .__gtdf2, .gt }, + .gte => if (use_aeabi) .{ .__aeabi_dcmpge, .neq } else .{ .__gedf2, .gte }, + }, + 80 => switch (op) { + .eq => .{ .__eqxf2, .eq }, + .neq => .{ .__nexf2, .neq }, + .lt => .{ .__ltxf2, .lt }, + .lte => .{ .__lexf2, .lte }, + .gt => .{ .__gtxf2, .gt }, + .gte => .{ .__gexf2, .gte }, + }, + 128 => switch (op) { + .eq => .{ .__eqtf2, .eq }, + .neq => .{ .__netf2, .neq }, + .lt => .{ .__lttf2, .lt }, + .lte => .{ .__letf2, .lte }, + .gt => .{ .__gttf2, .gt }, + .gte => .{ .__getf2, .gte }, + }, + else => unreachable, + // zig fmt: on + }; + const call_inst = try b.addCompilerRtCall(l, func, &.{ lhs, rhs }); + const raw_result = call_inst.toRef(); + assert(l.typeOf(raw_result).toIntern() == .i32_type); + const zero_i32: Air.Inst.Ref = .fromValue(try pt.intValue(.i32, 0)); + const ret_cmp_tag: Air.Inst.Tag = .fromCmpOp(ret_cmp_op, false); + return b.addBinOp(l, ret_cmp_tag, raw_result, zero_i32).toRef(); + } + /// Returns the unused capacity of `b.instructions`, and shrinks `b.instructions` down to `b.len`. /// This is useful when you've provided a buffer big enough for all your instructions, but you are /// now starting a new block and some of them need to live there instead. @@ -2525,6 +2923,484 @@ inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, comptime tag: Air.Inst return tag; } +fn compilerRtCall( + l: *Legalize, + orig_inst: Air.Inst.Index, + func: Air.CompilerRtFunc, + args: []const Air.Inst.Ref, + result_ty: Type, +) Error!Air.Inst.Tag { + const zcu = l.pt.zcu; + const gpa = zcu.gpa; + + const func_ret_ty = func.returnType(); + + if (func_ret_ty.toIntern() == result_ty.toIntern()) { + try l.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Call).@"struct".fields.len + args.len); + const payload = l.addExtra(Air.Call, .{ .args_len = @intCast(args.len) }) catch unreachable; + l.air_extra.appendSliceAssumeCapacity(@ptrCast(args)); + return l.replaceInst(orig_inst, .legalize_compiler_rt_call, .{ .legalize_compiler_rt_call = .{ + .func = func, + .payload = payload, + } }); + } + + // We need to bitcast the result to an "alias" type (e.g. c_int/i32, c_longdouble/f128). + + assert(func_ret_ty.bitSize(zcu) == result_ty.bitSize(zcu)); + + var inst_buf: [3]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const call_inst = try main_block.addCompilerRtCall(l, func, args); + const casted_result = main_block.addBitCast(l, result_ty, call_inst.toRef()); + main_block.addBr(l, orig_inst, casted_result); + + return l.replaceInst(orig_inst, .block, .{ .ty_pl = .{ + .ty = .fromType(result_ty), + .payload = try l.addBlockBody(main_block.body()), + } }); +} + +fn softFptruncFunc(l: *const Legalize, src_ty: Type, dst_ty: Type) Air.CompilerRtFunc { + const target = l.pt.zcu.getTarget(); + const src_bits = src_ty.floatBits(target); + const dst_bits = dst_ty.floatBits(target); + assert(dst_bits < src_bits); + const to_f16_func: Air.CompilerRtFunc = switch (src_bits) { + 128 => .__trunctfhf2, + 80 => .__truncxfhf2, + 64 => .__truncdfhf2, + 32 => .__truncsfhf2, + else => unreachable, + }; + const offset: u8 = switch (dst_bits) { + 16 => 0, + 32 => 1, + 64 => 2, + 80 => 3, + else => unreachable, + }; + return @enumFromInt(@intFromEnum(to_f16_func) + offset); +} +fn softFpextFunc(l: *const Legalize, src_ty: Type, dst_ty: Type) Air.CompilerRtFunc { + const target = l.pt.zcu.getTarget(); + const src_bits = src_ty.floatBits(target); + const dst_bits = dst_ty.floatBits(target); + assert(dst_bits > src_bits); + const to_f128_func: Air.CompilerRtFunc = switch (src_bits) { + 16 => .__extendhftf2, + 32 => .__extendsftf2, + 64 => .__extenddftf2, + 80 => .__extendxftf2, + else => unreachable, + }; + const offset: u8 = switch (dst_bits) { + 128 => 0, + 80 => 1, + 64 => 2, + 32 => 3, + else => unreachable, + }; + return @enumFromInt(@intFromEnum(to_f128_func) + offset); +} +fn softFloatFromInt(l: *Legalize, orig_inst: Air.Inst.Index) Error!union(enum) { + call: Air.CompilerRtFunc, + block_payload: Air.Inst.Data, +} { + const pt = l.pt; + const zcu = pt.zcu; + const target = zcu.getTarget(); + + const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; + const dest_ty = ty_op.ty.toType(); + const src_ty = l.typeOf(ty_op.operand); + + const src_info = src_ty.intInfo(zcu); + const float_off: u32 = switch (dest_ty.floatBits(target)) { + 16 => 0, + 32 => 1, + 64 => 2, + 80 => 3, + 128 => 4, + else => unreachable, + }; + const base: Air.CompilerRtFunc = switch (src_info.signedness) { + .signed => .__floatsihf, + .unsigned => .__floatunsihf, + }; + fixed: { + const extended_int_bits: u16, const int_bits_off: u32 = switch (src_info.bits) { + 0...32 => .{ 32, 0 }, + 33...64 => .{ 64, 5 }, + 65...128 => .{ 128, 10 }, + else => break :fixed, + }; + // x86_64-windows uses an odd callconv for 128-bit integers, so we use the + // arbitrary-precision routine in that case for simplicity. + if (target.cpu.arch == .x86_64 and target.os.tag == .windows and extended_int_bits == 128) { + break :fixed; + } + + const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + int_bits_off + float_off); + if (extended_int_bits == src_info.bits) return .{ .call = func }; + + // We need to emit a block which first sign/zero-extends to the right type and *then* calls + // the required routine. + const extended_ty = try l.pt.intType(src_info.signedness, extended_int_bits); + + var inst_buf: [4]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + + const extended_val = main_block.addTyOp(l, .intcast, extended_ty, ty_op.operand).toRef(); + const call_inst = try main_block.addCompilerRtCall(l, func, &.{extended_val}); + const casted_result = main_block.addBitCast(l, dest_ty, call_inst.toRef()); + main_block.addBr(l, orig_inst, casted_result); + + return .{ .block_payload = .{ .ty_pl = .{ + .ty = .fromType(dest_ty), + .payload = try l.addBlockBody(main_block.body()), + } } }; + } + + // We need to emit a block which puts the integer into an `alloc` (possibly sign/zero-extended) + // and calls an arbitrary-width conversion routine. + + const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + 15 + float_off); + + // The extended integer routines expect the integer representation where the integer is + // effectively zero- or sign-extended to its ABI size. We represent that by intcasting to + // such an integer type and passing a pointer to *that*. + const extended_ty = try pt.intType(src_info.signedness, @intCast(src_ty.abiSize(zcu) * 8)); + assert(extended_ty.abiSize(zcu) == src_ty.abiSize(zcu)); + + var inst_buf: [6]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + + const extended_val: Air.Inst.Ref = if (extended_ty.toIntern() != src_ty.toIntern()) ext: { + break :ext main_block.addTyOp(l, .intcast, extended_ty, ty_op.operand).toRef(); + } else ext: { + _ = main_block.stealCapacity(1); + break :ext ty_op.operand; + }; + const extended_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(extended_ty)).toRef(); + _ = main_block.addBinOp(l, .store, extended_ptr, extended_val); + const bits_val = try pt.intValue(.usize, src_info.bits); + const call_inst = try main_block.addCompilerRtCall(l, func, &.{ extended_ptr, .fromValue(bits_val) }); + const casted_result = main_block.addBitCast(l, dest_ty, call_inst.toRef()); + main_block.addBr(l, orig_inst, casted_result); + + return .{ .block_payload = .{ .ty_pl = .{ + .ty = .fromType(dest_ty), + .payload = try l.addBlockBody(main_block.body()), + } } }; +} +fn softIntFromFloat(l: *Legalize, orig_inst: Air.Inst.Index) Error!union(enum) { + call: Air.CompilerRtFunc, + block_payload: Air.Inst.Data, +} { + const pt = l.pt; + const zcu = pt.zcu; + const target = zcu.getTarget(); + + const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; + const src_ty = l.typeOf(ty_op.operand); + const dest_ty = ty_op.ty.toType(); + + const dest_info = dest_ty.intInfo(zcu); + const float_off: u32 = switch (src_ty.floatBits(target)) { + 16 => 0, + 32 => 1, + 64 => 2, + 80 => 3, + 128 => 4, + else => unreachable, + }; + const base: Air.CompilerRtFunc = switch (dest_info.signedness) { + .signed => .__fixhfsi, + .unsigned => .__fixunshfsi, + }; + fixed: { + const extended_int_bits: u16, const int_bits_off: u32 = switch (dest_info.bits) { + 0...32 => .{ 32, 0 }, + 33...64 => .{ 64, 5 }, + 65...128 => .{ 128, 10 }, + else => break :fixed, + }; + // x86_64-windows uses an odd callconv for 128-bit integers, so we use the + // arbitrary-precision routine in that case for simplicity. + if (target.cpu.arch == .x86_64 and target.os.tag == .windows and extended_int_bits == 128) { + break :fixed; + } + + const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + int_bits_off + float_off); + if (extended_int_bits == dest_info.bits) return .{ .call = func }; + + // We need to emit a block which calls the routine and then casts to the required type. + + var inst_buf: [3]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + + const call_inst = try main_block.addCompilerRtCall(l, func, &.{ty_op.operand}); + const casted_val = main_block.addTyOp(l, .intcast, dest_ty, call_inst.toRef()).toRef(); + main_block.addBr(l, orig_inst, casted_val); + + return .{ .block_payload = .{ .ty_pl = .{ + .ty = .fromType(dest_ty), + .payload = try l.addBlockBody(main_block.body()), + } } }; + } + + // We need to emit a block which calls an arbitrary-width conversion routine, then loads the + // integer from an `alloc` and possibly truncates it. + const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + 15 + float_off); + + const extended_ty = try pt.intType(dest_info.signedness, @intCast(dest_ty.abiSize(zcu) * 8)); + assert(extended_ty.abiSize(zcu) == dest_ty.abiSize(zcu)); + + var inst_buf: [5]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + + const extended_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(extended_ty)).toRef(); + const bits_val = try pt.intValue(.usize, dest_info.bits); + _ = try main_block.addCompilerRtCall(l, func, &.{ extended_ptr, .fromValue(bits_val), ty_op.operand }); + const extended_val = main_block.addTyOp(l, .load, extended_ty, extended_ptr).toRef(); + const result_val = main_block.addTyOp(l, .intcast, dest_ty, extended_val).toRef(); + main_block.addBr(l, orig_inst, result_val); + + return .{ .block_payload = .{ .ty_pl = .{ + .ty = .fromType(dest_ty), + .payload = try l.addBlockBody(main_block.body()), + } } }; +} +fn softFloatFunc(op: Air.Inst.Tag, float_ty: Type, zcu: *const Zcu) Air.CompilerRtFunc { + const f16_func: Air.CompilerRtFunc = switch (op) { + .add, .add_optimized => .__addhf3, + .sub, .sub_optimized => .__subhf3, + .mul, .mul_optimized => .__mulhf3, + + .div_float, + .div_float_optimized, + .div_exact, + .div_exact_optimized, + => .__divhf3, + + .min => .__fminh, + .max => .__fmaxh, + + .ceil => .__ceilh, + .floor => .__floorh, + .trunc_float => .__trunch, + .round => .__roundh, + + .log => .__logh, + .log2 => .__log2h, + .log10 => .__log10h, + + .exp => .__exph, + .exp2 => .__exp2h, + + .sin => .__sinh, + .cos => .__cosh, + .tan => .__tanh, + + .abs => .__fabsh, + .sqrt => .__sqrth, + .rem, .rem_optimized => .__fmodh, + .mul_add => .__fmah, + + else => unreachable, + }; + const offset: u8 = switch (float_ty.floatBits(zcu.getTarget())) { + 16 => 0, + 32 => 1, + 64 => 2, + 80 => 3, + 128 => 4, + else => unreachable, + }; + return @enumFromInt(@intFromEnum(f16_func) + offset); +} + +fn softFloatNegBlockPayload( + l: *Legalize, + orig_inst: Air.Inst.Index, + operand: Air.Inst.Ref, +) Error!Air.Inst.Data { + const pt = l.pt; + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const float_ty = l.typeOfIndex(orig_inst); + + const int_ty: Type, const sign_bit: Value = switch (float_ty.floatBits(zcu.getTarget())) { + 16 => .{ .u16, try pt.intValue(.u16, @as(u16, 1) << 15) }, + 32 => .{ .u32, try pt.intValue(.u32, @as(u32, 1) << 31) }, + 64 => .{ .u64, try pt.intValue(.u64, @as(u64, 1) << 63) }, + 80 => .{ .u80, try pt.intValue(.u80, @as(u80, 1) << 79) }, + 128 => .{ .u128, try pt.intValue(.u128, @as(u128, 1) << 127) }, + else => unreachable, + }; + + const sign_bit_ref: Air.Inst.Ref = .fromValue(sign_bit); + + var inst_buf: [4]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const operand_as_int = main_block.addBitCast(l, int_ty, operand); + const result_as_int = main_block.addBinOp(l, .xor, operand_as_int, sign_bit_ref).toRef(); + const result = main_block.addBitCast(l, float_ty, result_as_int); + main_block.addBr(l, orig_inst, result); + + return .{ .ty_pl = .{ + .ty = .fromType(float_ty), + .payload = try l.addBlockBody(main_block.body()), + } }; +} + +fn softFloatDivTruncFloorBlockPayload( + l: *Legalize, + orig_inst: Air.Inst.Index, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, + air_tag: Air.Inst.Tag, +) Error!Air.Inst.Data { + const zcu = l.pt.zcu; + const gpa = zcu.gpa; + + const float_ty = l.typeOfIndex(orig_inst); + + const floor_tag: Air.Inst.Tag = switch (air_tag) { + .div_trunc, .div_trunc_optimized => .trunc_float, + .div_floor, .div_floor_optimized => .floor, + else => unreachable, + }; + + var inst_buf: [4]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const div_inst = try main_block.addCompilerRtCall(l, softFloatFunc(.div_float, float_ty, zcu), &.{ lhs, rhs }); + const floor_inst = try main_block.addCompilerRtCall(l, softFloatFunc(floor_tag, float_ty, zcu), &.{div_inst.toRef()}); + const casted_result = main_block.addBitCast(l, float_ty, floor_inst.toRef()); + main_block.addBr(l, orig_inst, casted_result); + + return .{ .ty_pl = .{ + .ty = .fromType(float_ty), + .payload = try l.addBlockBody(main_block.body()), + } }; +} +fn softFloatModBlockPayload( + l: *Legalize, + orig_inst: Air.Inst.Index, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, +) Error!Air.Inst.Data { + const pt = l.pt; + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const float_ty = l.typeOfIndex(orig_inst); + + var inst_buf: [10]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const rem = try main_block.addCompilerRtCall(l, softFloatFunc(.rem, float_ty, zcu), &.{ lhs, rhs }); + const lhs_lt_zero = try main_block.addSoftFloatCmp(l, float_ty, .lt, lhs, .fromValue(try pt.floatValue(float_ty, 0.0))); + + var condbr: CondBr = .init(l, lhs_lt_zero, &main_block, .{}); + condbr.then_block = .init(main_block.stealRemainingCapacity()); + { + const add = try condbr.then_block.addCompilerRtCall(l, softFloatFunc(.add, float_ty, zcu), &.{ rem.toRef(), rhs }); + const inner_rem = try condbr.then_block.addCompilerRtCall(l, softFloatFunc(.rem, float_ty, zcu), &.{ add.toRef(), rhs }); + const casted_result = condbr.then_block.addBitCast(l, float_ty, inner_rem.toRef()); + condbr.then_block.addBr(l, orig_inst, casted_result); + } + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + { + const casted_result = condbr.else_block.addBitCast(l, float_ty, rem.toRef()); + condbr.else_block.addBr(l, orig_inst, casted_result); + } + + try condbr.finish(l); + + return .{ .ty_pl = .{ + .ty = .fromType(float_ty), + .payload = try l.addBlockBody(main_block.body()), + } }; +} +fn softFloatCmpBlockPayload( + l: *Legalize, + orig_inst: Air.Inst.Index, + float_ty: Type, + op: std.math.CompareOperator, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, +) Error!Air.Inst.Data { + const pt = l.pt; + const gpa = pt.zcu.gpa; + + var inst_buf: [3]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const result = try main_block.addSoftFloatCmp(l, float_ty, op, lhs, rhs); + main_block.addBr(l, orig_inst, result); + + return .{ .ty_pl = .{ + .ty = .bool_type, + .payload = try l.addBlockBody(main_block.body()), + } }; +} + +/// `inline` to propagate potentially comptime-known return value. +inline fn wantScalarizeOrSoftFloat( + l: *const Legalize, + comptime air_tag: Air.Inst.Tag, + ty: Type, +) enum { + none, + scalarize, + soft_float, +} { + const zcu = l.pt.zcu; + const is_vec, const scalar_ty = switch (ty.zigTypeTag(zcu)) { + .vector => .{ true, ty.childType(zcu) }, + else => .{ false, ty }, + }; + + if (is_vec and l.features.has(.scalarize(air_tag))) return .scalarize; + + if (l.wantSoftFloatScalar(scalar_ty)) { + return if (is_vec) .scalarize else .soft_float; + } + return .none; +} + +/// `inline` to propagate potentially comptime-known return value. +inline fn wantSoftFloatScalar(l: *const Legalize, ty: Type) bool { + const zcu = l.pt.zcu; + return switch (ty.zigTypeTag(zcu)) { + .vector => unreachable, + .float => switch (ty.floatBits(zcu.getTarget())) { + 16 => l.features.has(.soft_f16), + 32 => l.features.has(.soft_f32), + 64 => l.features.has(.soft_f64), + 80 => l.features.has(.soft_f80), + 128 => l.features.has(.soft_f128), + else => unreachable, + }, + else => false, + }; +} + const Air = @import("../Air.zig"); const assert = std.debug.assert; const dev = @import("../dev.zig"); diff --git a/src/Air/Liveness.zig b/src/Air/Liveness.zig index c60ece5e4f..44364465bb 100644 --- a/src/Air/Liveness.zig +++ b/src/Air/Liveness.zig @@ -776,6 +776,24 @@ fn analyzeInst( const bin = a.air.extraData(Air.Bin, pl_op.payload).data; return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, bin.lhs, bin.rhs }); }, + + .legalize_compiler_rt_call => { + const extra = a.air.extraData(Air.Call, inst_datas[@intFromEnum(inst)].legalize_compiler_rt_call.payload); + const args: []const Air.Inst.Ref = @ptrCast(a.air.extra.items[extra.end..][0..extra.data.args_len]); + if (args.len <= bpi - 1) { + var buf: [bpi - 1]Air.Inst.Ref = @splat(.none); + @memcpy(buf[0..args.len], args); + return analyzeOperands(a, pass, data, inst, buf); + } + var big = try AnalyzeBigOperands(pass).init(a, data, inst, args.len + 1); + defer big.deinit(); + var i: usize = args.len; + while (i > 0) { + i -= 1; + try big.feed(args[i]); + } + return big.finish(); + }, } } diff --git a/src/Air/Liveness/Verify.zig b/src/Air/Liveness/Verify.zig index f522e1367e..617ad5eaac 100644 --- a/src/Air/Liveness/Verify.zig +++ b/src/Air/Liveness/Verify.zig @@ -583,6 +583,15 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { const bin = self.air.extraData(Air.Bin, pl_op.payload).data; try self.verifyInstOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs }); }, + .legalize_compiler_rt_call => { + const extra = self.air.extraData(Air.Call, data[@intFromEnum(inst)].legalize_compiler_rt_call.payload); + const args: []const Air.Inst.Ref = @ptrCast(self.air.extra.items[extra.end..][0..extra.data.args_len]); + var bt = self.liveness.iterateBigTomb(inst); + for (args) |arg| { + try self.verifyOperand(inst, arg, bt.feed()); + } + try self.verifyInst(inst); + }, } } } diff --git a/src/Air/print.zig b/src/Air/print.zig index 3324055dc7..ba167bbfd9 100644 --- a/src/Air/print.zig +++ b/src/Air/print.zig @@ -333,6 +333,7 @@ const Writer = struct { .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst), .runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst), .legalize_vec_store_elem => try w.writeLegalizeVecStoreElem(s, inst), + .legalize_compiler_rt_call => try w.writeLegalizeCompilerRtCall(s, inst), .work_item_id, .work_group_size, @@ -522,6 +523,19 @@ const Writer = struct { try s.writeAll(", "); } + fn writeLegalizeCompilerRtCall(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { + const inst_data = w.air.instructions.items(.data)[@intFromEnum(inst)].legalize_compiler_rt_call; + const extra = w.air.extraData(Air.Call, inst_data.payload); + const args: []const Air.Inst.Ref = @ptrCast(w.air.extra.items[extra.end..][0..extra.data.args_len]); + + try s.print("{t}, [", .{inst_data.func}); + for (args, 0..) |arg, i| { + if (i != 0) try s.writeAll(", "); + try w.writeOperand(s, inst, i, arg); + } + try s.writeByte(']'); + } + fn writeShuffleOne(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst); try w.writeType(s, unwrapped.result_ty); diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index d90550982d..752b4eccc3 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -418,6 +418,12 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { for (inputs) |input| if (input != .none and !checkRef(input, zcu)) return false; }, + .legalize_compiler_rt_call => { + const extra = air.extraData(Air.Call, data.legalize_compiler_rt_call.payload); + const args: []const Air.Inst.Ref = @ptrCast(air.extra.items[extra.end..][0..extra.data.args_len]); + for (args) |arg| if (!checkRef(arg, zcu)) return false; + }, + .trap, .breakpoint, .ret_addr, diff --git a/src/codegen/aarch64/Select.zig b/src/codegen/aarch64/Select.zig index 64aeeb7ff4..b19f6f77cc 100644 --- a/src/codegen/aarch64/Select.zig +++ b/src/codegen/aarch64/Select.zig @@ -137,6 +137,8 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void { // No "scalarize" legalizations are enabled, so these instructions never appear. .legalize_vec_elem_val => unreachable, .legalize_vec_store_elem => unreachable, + // No soft float legalizations are enabled. + .legalize_compiler_rt_call => unreachable, .arg, .ret_addr, diff --git a/src/codegen/c.zig b/src/codegen/c.zig index e3b33beb14..c4b909d4a9 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3328,6 +3328,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void { // No "scalarize" legalizations are enabled, so these instructions never appear. .legalize_vec_elem_val => unreachable, .legalize_vec_store_elem => unreachable, + // No soft float legalizations are enabled. + .legalize_compiler_rt_call => unreachable, .arg => try airArg(f, inst), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 7c7151524a..0df4cbc3d4 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -4889,6 +4889,8 @@ pub const FuncGen = struct { // No "scalarize" legalizations are enabled, so these instructions never appear. .legalize_vec_elem_val => unreachable, .legalize_vec_store_elem => unreachable, + // No soft float legalizations are enabled. + .legalize_compiler_rt_call => unreachable, .add => try self.airAdd(inst, .normal), .add_optimized => try self.airAdd(inst, .fast), @@ -6670,7 +6672,9 @@ pub const FuncGen = struct { "", ); - const rt_int_bits = compilerRtIntBits(@intCast(operand_scalar_ty.bitSize(zcu))); + const rt_int_bits = compilerRtIntBits(@intCast(operand_scalar_ty.bitSize(zcu))) orelse { + return self.todo("float_from_int from '{f}' without intrinsics", .{operand_scalar_ty.fmt(pt)}); + }; const rt_int_ty = try o.builder.intType(rt_int_bits); var extended = try self.wip.conv( if (is_signed_int) .signed else .unsigned, @@ -6739,7 +6743,9 @@ pub const FuncGen = struct { ); } - const rt_int_bits = compilerRtIntBits(@intCast(dest_scalar_ty.bitSize(zcu))); + const rt_int_bits = compilerRtIntBits(@intCast(dest_scalar_ty.bitSize(zcu))) orelse { + return self.todo("int_from_float to '{f}' without intrinsics", .{dest_scalar_ty.fmt(pt)}); + }; const ret_ty = try o.builder.intType(rt_int_bits); const libc_ret_ty = if (rt_int_bits == 128 and (target.os.tag == .windows and target.cpu.arch == .x86_64)) b: { // On Windows x86-64, "ti" functions must use Vector(2, u64) instead of the standard @@ -12823,13 +12829,13 @@ const optional_layout_version = 3; const lt_errors_fn_name = "__zig_lt_errors_len"; -fn compilerRtIntBits(bits: u16) u16 { +fn compilerRtIntBits(bits: u16) ?u16 { inline for (.{ 32, 64, 128 }) |b| { if (bits <= b) { return b; } } - return bits; + return null; } fn buildAllocaInner( diff --git a/src/codegen/riscv64/CodeGen.zig b/src/codegen/riscv64/CodeGen.zig index cdca3c2fd8..ac176c4780 100644 --- a/src/codegen/riscv64/CodeGen.zig +++ b/src/codegen/riscv64/CodeGen.zig @@ -1395,6 +1395,8 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { // No "scalarize" legalizations are enabled, so these instructions never appear. .legalize_vec_elem_val => unreachable, .legalize_vec_store_elem => unreachable, + // No soft float legalizations are enabled. + .legalize_compiler_rt_call => unreachable, .add, .add_wrap, diff --git a/src/codegen/sparc64/CodeGen.zig b/src/codegen/sparc64/CodeGen.zig index 4cbe07c762..c681137bae 100644 --- a/src/codegen/sparc64/CodeGen.zig +++ b/src/codegen/sparc64/CodeGen.zig @@ -483,6 +483,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { // No "scalarize" legalizations are enabled, so these instructions never appear. .legalize_vec_elem_val => unreachable, .legalize_vec_store_elem => unreachable, + // No soft float legalizations are enabled. + .legalize_compiler_rt_call => unreachable, .ptr_add => try self.airPtrArithmetic(inst, .ptr_add), .ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub), diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig index 684513bf82..1d83474cc3 100644 --- a/src/codegen/wasm/CodeGen.zig +++ b/src/codegen/wasm/CodeGen.zig @@ -1789,6 +1789,8 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { // No "scalarize" legalizations are enabled, so these instructions never appear. .legalize_vec_elem_val => unreachable, .legalize_vec_store_elem => unreachable, + // No soft float legalizations are enabled. + .legalize_compiler_rt_call => unreachable, .inferred_alloc, .inferred_alloc_comptime => unreachable, diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index f0772dcd73..b43b359de1 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -173689,6 +173689,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }; for (ops) |op| try op.die(cg); }, + + // No soft-float `Legalize` features are enabled, so this instruction never appears. + .legalize_compiler_rt_call => unreachable, + .work_item_id, .work_group_size, .work_group_id => unreachable, } try cg.resetTemps(@enumFromInt(0)); diff --git a/src/target.zig b/src/target.zig index 66aba7e5cd..a721b0bf65 100644 --- a/src/target.zig +++ b/src/target.zig @@ -842,7 +842,7 @@ pub fn compilerRtIntAbbrev(bits: u16) []const u8 { 32 => "s", 64 => "d", 128 => "t", - else => "o", // Non-standard + else => unreachable, }; }