mirror of
https://github.com/ziglang/zig.git
synced 2026-02-12 20:37:54 +00:00
Merge pull request #13910 from Luukdegram/wasm-simd
This commit is contained in:
commit
2e66b3be6e
272
lib/std/wasm.zig
272
lib/std/wasm.zig
@ -237,6 +237,277 @@ pub const PrefixedOpcode = enum(u8) {
|
||||
_,
|
||||
};
|
||||
|
||||
/// Simd opcodes that require a prefix `0xFD`.
|
||||
/// Each opcode represents a varuint32, meaning
|
||||
/// they are encoded as leb128 in binary.
|
||||
pub const SimdOpcode = enum(u32) {
|
||||
v128_load = 0x00,
|
||||
v128_load8x8_s = 0x01,
|
||||
v128_load8x8_u = 0x02,
|
||||
v128_load16x4_s = 0x03,
|
||||
v128_load16x4_u = 0x04,
|
||||
v128_load32x2_s = 0x05,
|
||||
v128_load32x2_u = 0x06,
|
||||
v128_load8_splat = 0x07,
|
||||
v128_load16_splat = 0x08,
|
||||
v128_load32_splat = 0x09,
|
||||
v128_load64_splat = 0x0A,
|
||||
v128_store = 0x0B,
|
||||
v128_const = 0x0C,
|
||||
i8x16_shuffle = 0x0D,
|
||||
i8x16_swizzle = 0x0E,
|
||||
i8x16_splat = 0x0F,
|
||||
i16x8_splat = 0x10,
|
||||
i32x4_splat = 0x11,
|
||||
i64x2_splat = 0x12,
|
||||
f32x4_splat = 0x13,
|
||||
f64x2_splat = 0x14,
|
||||
i8x16_extract_lane_s = 0x15,
|
||||
i8x16_extract_lane_u = 0x16,
|
||||
i8x16_replace_lane = 0x17,
|
||||
i16x8_extract_lane_s = 0x18,
|
||||
i16x8_extract_lane_u = 0x19,
|
||||
i16x8_replace_lane = 0x1A,
|
||||
i32x4_extract_lane = 0x1B,
|
||||
i32x4_replace_lane = 0x1C,
|
||||
i64x2_extract_lane = 0x1D,
|
||||
i64x2_replace_lane = 0x1E,
|
||||
f32x4_extract_lane = 0x1F,
|
||||
f32x4_replace_lane = 0x20,
|
||||
f64x2_extract_lane = 0x21,
|
||||
f64x2_replace_lane = 0x22,
|
||||
i8x16_eq = 0x23,
|
||||
i16x8_eq = 0x2D,
|
||||
i32x4_eq = 0x37,
|
||||
i8x16_ne = 0x24,
|
||||
i16x8_ne = 0x2E,
|
||||
i32x4_ne = 0x38,
|
||||
i8x16_lt_s = 0x25,
|
||||
i16x8_lt_s = 0x2F,
|
||||
i32x4_lt_s = 0x39,
|
||||
i8x16_lt_u = 0x26,
|
||||
i16x8_lt_u = 0x30,
|
||||
i32x4_lt_u = 0x3A,
|
||||
i8x16_gt_s = 0x27,
|
||||
i16x8_gt_s = 0x31,
|
||||
i32x4_gt_s = 0x3B,
|
||||
i8x16_gt_u = 0x28,
|
||||
i16x8_gt_u = 0x32,
|
||||
i32x4_gt_u = 0x3C,
|
||||
i8x16_le_s = 0x29,
|
||||
i16x8_le_s = 0x33,
|
||||
i32x4_le_s = 0x3D,
|
||||
i8x16_le_u = 0x2A,
|
||||
i16x8_le_u = 0x34,
|
||||
i32x4_le_u = 0x3E,
|
||||
i8x16_ge_s = 0x2B,
|
||||
i16x8_ge_s = 0x35,
|
||||
i32x4_ge_s = 0x3F,
|
||||
i8x16_ge_u = 0x2C,
|
||||
i16x8_ge_u = 0x36,
|
||||
i32x4_ge_u = 0x40,
|
||||
f32x4_eq = 0x41,
|
||||
f64x2_eq = 0x47,
|
||||
f32x4_ne = 0x42,
|
||||
f64x2_ne = 0x48,
|
||||
f32x4_lt = 0x43,
|
||||
f64x2_lt = 0x49,
|
||||
f32x4_gt = 0x44,
|
||||
f64x2_gt = 0x4A,
|
||||
f32x4_le = 0x45,
|
||||
f64x2_le = 0x4B,
|
||||
f32x4_ge = 0x46,
|
||||
f64x2_ge = 0x4C,
|
||||
v128_not = 0x4D,
|
||||
v128_and = 0x4E,
|
||||
v128_andnot = 0x4F,
|
||||
v128_or = 0x50,
|
||||
v128_xor = 0x51,
|
||||
v128_bitselect = 0x52,
|
||||
v128_any_true = 0x53,
|
||||
v128_load8_lane = 0x54,
|
||||
v128_load16_lane = 0x55,
|
||||
v128_load32_lane = 0x56,
|
||||
v128_load64_lane = 0x57,
|
||||
v128_store8_lane = 0x58,
|
||||
v128_store16_lane = 0x59,
|
||||
v128_store32_lane = 0x5A,
|
||||
v128_store64_lane = 0x5B,
|
||||
v128_load32_zero = 0x5C,
|
||||
v128_load64_zero = 0x5D,
|
||||
f32x4_demote_f64x2_zero = 0x5E,
|
||||
f64x2_promote_low_f32x4 = 0x5F,
|
||||
i8x16_abs = 0x60,
|
||||
i16x8_abs = 0x80,
|
||||
i32x4_abs = 0xA0,
|
||||
i64x2_abs = 0xC0,
|
||||
i8x16_neg = 0x61,
|
||||
i16x8_neg = 0x81,
|
||||
i32x4_neg = 0xA1,
|
||||
i64x2_neg = 0xC1,
|
||||
i8x16_popcnt = 0x62,
|
||||
i16x8_q15mulr_sat_s = 0x82,
|
||||
i8x16_all_true = 0x63,
|
||||
i16x8_all_true = 0x83,
|
||||
i32x4_all_true = 0xA3,
|
||||
i64x2_all_true = 0xC3,
|
||||
i8x16_bitmask = 0x64,
|
||||
i16x8_bitmask = 0x84,
|
||||
i32x4_bitmask = 0xA4,
|
||||
i64x2_bitmask = 0xC4,
|
||||
i8x16_narrow_i16x8_s = 0x65,
|
||||
i16x8_narrow_i32x4_s = 0x85,
|
||||
i8x16_narrow_i16x8_u = 0x66,
|
||||
i16x8_narrow_i32x4_u = 0x86,
|
||||
f32x4_ceil = 0x67,
|
||||
i16x8_extend_low_i8x16_s = 0x87,
|
||||
i32x4_extend_low_i16x8_s = 0xA7,
|
||||
i64x2_extend_low_i32x4_s = 0xC7,
|
||||
f32x4_floor = 0x68,
|
||||
i16x8_extend_high_i8x16_s = 0x88,
|
||||
i32x4_extend_high_i16x8_s = 0xA8,
|
||||
i64x2_extend_high_i32x4_s = 0xC8,
|
||||
f32x4_trunc = 0x69,
|
||||
i16x8_extend_low_i8x16_u = 0x89,
|
||||
i32x4_extend_low_i16x8_u = 0xA9,
|
||||
i64x2_extend_low_i32x4_u = 0xC9,
|
||||
f32x4_nearest = 0x6A,
|
||||
i16x8_extend_high_i8x16_u = 0x8A,
|
||||
i32x4_extend_high_i16x8_u = 0xAA,
|
||||
i64x2_extend_high_i32x4_u = 0xCA,
|
||||
i8x16_shl = 0x6B,
|
||||
i16x8_shl = 0x8B,
|
||||
i32x4_shl = 0xAB,
|
||||
i64x2_shl = 0xCB,
|
||||
i8x16_shr_s = 0x6C,
|
||||
i16x8_shr_s = 0x8C,
|
||||
i32x4_shr_s = 0xAC,
|
||||
i64x2_shr_s = 0xCC,
|
||||
i8x16_shr_u = 0x6D,
|
||||
i16x8_shr_u = 0x8D,
|
||||
i32x4_shr_u = 0xAD,
|
||||
i64x2_shr_u = 0xCD,
|
||||
i8x16_add = 0x6E,
|
||||
i16x8_add = 0x8E,
|
||||
i32x4_add = 0xAE,
|
||||
i64x2_add = 0xCE,
|
||||
i8x16_add_sat_s = 0x6F,
|
||||
i16x8_add_sat_s = 0x8F,
|
||||
i8x16_add_sat_u = 0x70,
|
||||
i16x8_add_sat_u = 0x90,
|
||||
i8x16_sub = 0x71,
|
||||
i16x8_sub = 0x91,
|
||||
i32x4_sub = 0xB1,
|
||||
i64x2_sub = 0xD1,
|
||||
i8x16_sub_sat_s = 0x72,
|
||||
i16x8_sub_sat_s = 0x92,
|
||||
i8x16_sub_sat_u = 0x73,
|
||||
i16x8_sub_sat_u = 0x93,
|
||||
f64x2_ceil = 0x74,
|
||||
f64x2_nearest = 0x94,
|
||||
f64x2_floor = 0x75,
|
||||
i16x8_mul = 0x95,
|
||||
i32x4_mul = 0xB5,
|
||||
i64x2_mul = 0xD5,
|
||||
i8x16_min_s = 0x76,
|
||||
i16x8_min_s = 0x96,
|
||||
i32x4_min_s = 0xB6,
|
||||
i64x2_eq = 0xD6,
|
||||
i8x16_min_u = 0x77,
|
||||
i16x8_min_u = 0x97,
|
||||
i32x4_min_u = 0xB7,
|
||||
i64x2_ne = 0xD7,
|
||||
i8x16_max_s = 0x78,
|
||||
i16x8_max_s = 0x98,
|
||||
i32x4_max_s = 0xB8,
|
||||
i64x2_lt_s = 0xD8,
|
||||
i8x16_max_u = 0x79,
|
||||
i16x8_max_u = 0x99,
|
||||
i32x4_max_u = 0xB9,
|
||||
i64x2_gt_s = 0xD9,
|
||||
f64x2_trunc = 0x7A,
|
||||
i32x4_dot_i16x8_s = 0xBA,
|
||||
i64x2_le_s = 0xDA,
|
||||
i8x16_avgr_u = 0x7B,
|
||||
i16x8_avgr_u = 0x9B,
|
||||
i64x2_ge_s = 0xDB,
|
||||
i16x8_extadd_pairwise_i8x16_s = 0x7C,
|
||||
i16x8_extmul_low_i8x16_s = 0x9C,
|
||||
i32x4_extmul_low_i16x8_s = 0xBC,
|
||||
i64x2_extmul_low_i32x4_s = 0xDC,
|
||||
i16x8_extadd_pairwise_i8x16_u = 0x7D,
|
||||
i16x8_extmul_high_i8x16_s = 0x9D,
|
||||
i32x4_extmul_high_i16x8_s = 0xBD,
|
||||
i64x2_extmul_high_i32x4_s = 0xDD,
|
||||
i32x4_extadd_pairwise_i16x8_s = 0x7E,
|
||||
i16x8_extmul_low_i8x16_u = 0x9E,
|
||||
i32x4_extmul_low_i16x8_u = 0xBE,
|
||||
i64x2_extmul_low_i32x4_u = 0xDE,
|
||||
i32x4_extadd_pairwise_i16x8_u = 0x7F,
|
||||
i16x8_extmul_high_i8x16_u = 0x9F,
|
||||
i32x4_extmul_high_i16x8_u = 0xBF,
|
||||
i64x2_extmul_high_i32x4_u = 0xDF,
|
||||
f32x4_abs = 0xE0,
|
||||
f64x2_abs = 0xEC,
|
||||
f32x4_neg = 0xE1,
|
||||
f64x2_neg = 0xED,
|
||||
f32x4_sqrt = 0xE3,
|
||||
f64x2_sqrt = 0xEF,
|
||||
f32x4_add = 0xE4,
|
||||
f64x2_add = 0xF0,
|
||||
f32x4_sub = 0xE5,
|
||||
f64x2_sub = 0xF1,
|
||||
f32x4_mul = 0xE6,
|
||||
f64x2_mul = 0xF2,
|
||||
f32x4_div = 0xE7,
|
||||
f64x2_div = 0xF3,
|
||||
f32x4_min = 0xE8,
|
||||
f64x2_min = 0xF4,
|
||||
f32x4_max = 0xE9,
|
||||
f64x2_max = 0xF5,
|
||||
f32x4_pmin = 0xEA,
|
||||
f64x2_pmin = 0xF6,
|
||||
f32x4_pmax = 0xEB,
|
||||
f64x2_pmax = 0xF7,
|
||||
i32x4_trunc_sat_f32x4_s = 0xF8,
|
||||
i32x4_trunc_sat_f32x4_u = 0xF9,
|
||||
f32x4_convert_i32x4_s = 0xFA,
|
||||
f32x4_convert_i32x4_u = 0xFB,
|
||||
i32x4_trunc_sat_f64x2_s_zero = 0xFC,
|
||||
i32x4_trunc_sat_f64x2_u_zero = 0xFD,
|
||||
f64x2_convert_low_i32x4_s = 0xFE,
|
||||
f64x2_convert_low_i32x4_u = 0xFF,
|
||||
|
||||
// relaxed-simd opcodes
|
||||
i8x16_relaxed_swizzle = 0x100,
|
||||
i32x4_relaxed_trunc_f32x4_s = 0x101,
|
||||
i32x4_relaxed_trunc_f32x4_u = 0x102,
|
||||
i32x4_relaxed_trunc_f64x2_s_zero = 0x103,
|
||||
i32x4_relaxed_trunc_f64x2_u_zero = 0x104,
|
||||
f32x4_relaxed_madd = 0x105,
|
||||
f32x4_relaxed_nmadd = 0x106,
|
||||
f64x2_relaxed_madd = 0x107,
|
||||
f64x2_relaxed_nmadd = 0x108,
|
||||
i8x16_relaxed_laneselect = 0x109,
|
||||
i16x8_relaxed_laneselect = 0x10a,
|
||||
i32x4_relaxed_laneselect = 0x10b,
|
||||
i64x2_relaxed_laneselect = 0x10c,
|
||||
f32x4_relaxed_min = 0x10d,
|
||||
f32x4_relaxed_max = 0x10e,
|
||||
f64x2_relaxed_min = 0x10f,
|
||||
f64x2_relaxed_max = 0x110,
|
||||
i16x8_relaxed_q15mulr_s = 0x111,
|
||||
i16x8_relaxed_dot_i8x16_i7x16_s = 0x112,
|
||||
i32x4_relaxed_dot_i8x16_i7x16_add_s = 0x113,
|
||||
f32x4_relaxed_dot_bf16x8_add_f32x4 = 0x114,
|
||||
};
|
||||
|
||||
/// Returns the integer value of an `SimdOpcode`. Used by the Zig compiler
|
||||
/// to write instructions to the wasm binary file
|
||||
pub fn simdOpcode(op: SimdOpcode) u32 {
|
||||
return @enumToInt(op);
|
||||
}
|
||||
|
||||
/// Enum representing all Wasm value types as per spec:
|
||||
/// https://webassembly.github.io/spec/core/binary/types.html
|
||||
pub const Valtype = enum(u8) {
|
||||
@ -244,6 +515,7 @@ pub const Valtype = enum(u8) {
|
||||
i64 = 0x7E,
|
||||
f32 = 0x7D,
|
||||
f64 = 0x7C,
|
||||
v128 = 0x7B,
|
||||
};
|
||||
|
||||
/// Returns the integer value of a `Valtype`
|
||||
|
||||
@ -43,6 +43,10 @@ const WValue = union(enum) {
|
||||
imm32: u32,
|
||||
/// An immediate 64bit value
|
||||
imm64: u64,
|
||||
/// Index into the list of simd128 immediates. This `WValue` is
|
||||
/// only possible in very rare cases, therefore it would be
|
||||
/// a waste of memory to store the value in a 128 bit integer.
|
||||
imm128: u32,
|
||||
/// A constant 32bit float value
|
||||
float32: f32,
|
||||
/// A constant 64bit float value
|
||||
@ -116,6 +120,7 @@ const WValue = union(enum) {
|
||||
.i64 => gen.free_locals_i64.append(gen.gpa, local_value) catch return,
|
||||
.f32 => gen.free_locals_f32.append(gen.gpa, local_value) catch return,
|
||||
.f64 => gen.free_locals_f64.append(gen.gpa, local_value) catch return,
|
||||
.v128 => gen.free_locals_v128.append(gen.gpa, local_value) catch return,
|
||||
}
|
||||
value.* = undefined;
|
||||
}
|
||||
@ -258,18 +263,18 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
8 => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_load8_s else return .i32_load8_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_load8_s else return .i64_load8_u,
|
||||
.f32, .f64 => unreachable,
|
||||
.f32, .f64, .v128 => unreachable,
|
||||
},
|
||||
16 => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_load16_s else return .i32_load16_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_load16_s else return .i64_load16_u,
|
||||
.f32, .f64 => unreachable,
|
||||
.f32, .f64, .v128 => unreachable,
|
||||
},
|
||||
32 => switch (args.valtype1.?) {
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_load32_s else return .i64_load32_u,
|
||||
.i32 => return .i32_load,
|
||||
.f32 => return .f32_load,
|
||||
.f64 => unreachable,
|
||||
.f64, .v128 => unreachable,
|
||||
},
|
||||
64 => switch (args.valtype1.?) {
|
||||
.i64 => return .i64_load,
|
||||
@ -282,24 +287,25 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => return .i64_load,
|
||||
.f32 => return .f32_load,
|
||||
.f64 => return .f64_load,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.store => if (args.width) |width| {
|
||||
switch (width) {
|
||||
8 => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_store8,
|
||||
.i64 => return .i64_store8,
|
||||
.f32, .f64 => unreachable,
|
||||
.f32, .f64, .v128 => unreachable,
|
||||
},
|
||||
16 => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_store16,
|
||||
.i64 => return .i64_store16,
|
||||
.f32, .f64 => unreachable,
|
||||
.f32, .f64, .v128 => unreachable,
|
||||
},
|
||||
32 => switch (args.valtype1.?) {
|
||||
.i64 => return .i64_store32,
|
||||
.i32 => return .i32_store,
|
||||
.f32 => return .f32_store,
|
||||
.f64 => unreachable,
|
||||
.f64, .v128 => unreachable,
|
||||
},
|
||||
64 => switch (args.valtype1.?) {
|
||||
.i64 => return .i64_store,
|
||||
@ -314,6 +320,7 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => return .i64_store,
|
||||
.f32 => return .f32_store,
|
||||
.f64 => return .f64_store,
|
||||
.v128 => unreachable, // handled independently
|
||||
}
|
||||
},
|
||||
|
||||
@ -325,24 +332,27 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => return .i64_const,
|
||||
.f32 => return .f32_const,
|
||||
.f64 => return .f64_const,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.eqz => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_eqz,
|
||||
.i64 => return .i64_eqz,
|
||||
.f32, .f64 => unreachable,
|
||||
.f32, .f64, .v128 => unreachable,
|
||||
},
|
||||
.eq => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_eq,
|
||||
.i64 => return .i64_eq,
|
||||
.f32 => return .f32_eq,
|
||||
.f64 => return .f64_eq,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.ne => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_ne,
|
||||
.i64 => return .i64_ne,
|
||||
.f32 => return .f32_ne,
|
||||
.f64 => return .f64_ne,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.lt => switch (args.valtype1.?) {
|
||||
@ -350,40 +360,47 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_lt_s else return .i64_lt_u,
|
||||
.f32 => return .f32_lt,
|
||||
.f64 => return .f64_lt,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.gt => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_gt_s else return .i32_gt_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_gt_s else return .i64_gt_u,
|
||||
.f32 => return .f32_gt,
|
||||
.f64 => return .f64_gt,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.le => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_le_s else return .i32_le_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_le_s else return .i64_le_u,
|
||||
.f32 => return .f32_le,
|
||||
.f64 => return .f64_le,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.ge => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_ge_s else return .i32_ge_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_ge_s else return .i64_ge_u,
|
||||
.f32 => return .f32_ge,
|
||||
.f64 => return .f64_ge,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.clz => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_clz,
|
||||
.i64 => return .i64_clz,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.ctz => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_ctz,
|
||||
.i64 => return .i64_ctz,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.popcnt => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_popcnt,
|
||||
.i64 => return .i64_popcnt,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.add => switch (args.valtype1.?) {
|
||||
@ -391,18 +408,21 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => return .i64_add,
|
||||
.f32 => return .f32_add,
|
||||
.f64 => return .f64_add,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.sub => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_sub,
|
||||
.i64 => return .i64_sub,
|
||||
.f32 => return .f32_sub,
|
||||
.f64 => return .f64_sub,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.mul => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_mul,
|
||||
.i64 => return .i64_mul,
|
||||
.f32 => return .f32_mul,
|
||||
.f64 => return .f64_mul,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.div => switch (args.valtype1.?) {
|
||||
@ -410,71 +430,84 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_div_s else return .i64_div_u,
|
||||
.f32 => return .f32_div,
|
||||
.f64 => return .f64_div,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.rem => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_rem_s else return .i32_rem_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_rem_s else return .i64_rem_u,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.@"and" => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_and,
|
||||
.i64 => return .i64_and,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.@"or" => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_or,
|
||||
.i64 => return .i64_or,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.xor => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_xor,
|
||||
.i64 => return .i64_xor,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.shl => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_shl,
|
||||
.i64 => return .i64_shl,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.shr => switch (args.valtype1.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .i32_shr_s else return .i32_shr_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .i64_shr_s else return .i64_shr_u,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.rotl => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_rotl,
|
||||
.i64 => return .i64_rotl,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.rotr => switch (args.valtype1.?) {
|
||||
.i32 => return .i32_rotr,
|
||||
.i64 => return .i64_rotr,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.abs => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_abs,
|
||||
.f64 => return .f64_abs,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.neg => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_neg,
|
||||
.f64 => return .f64_neg,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.ceil => switch (args.valtype1.?) {
|
||||
.i64 => unreachable,
|
||||
.i32 => return .f32_ceil, // when valtype is f16, we store it in i32.
|
||||
.f32 => return .f32_ceil,
|
||||
.f64 => return .f64_ceil,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.floor => switch (args.valtype1.?) {
|
||||
.i64 => unreachable,
|
||||
.i32 => return .f32_floor, // when valtype is f16, we store it in i32.
|
||||
.f32 => return .f32_floor,
|
||||
.f64 => return .f64_floor,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.trunc => switch (args.valtype1.?) {
|
||||
.i32 => if (args.valtype2) |valty| switch (valty) {
|
||||
@ -482,40 +515,48 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => unreachable,
|
||||
.f32 => if (args.signedness.? == .signed) return .i32_trunc_f32_s else return .i32_trunc_f32_u,
|
||||
.f64 => if (args.signedness.? == .signed) return .i32_trunc_f64_s else return .i32_trunc_f64_u,
|
||||
.v128 => unreachable, // handled independently
|
||||
} else return .f32_trunc, // when no valtype2, it's an f16 instead which is stored in an i32.
|
||||
.i64 => switch (args.valtype2.?) {
|
||||
.i32 => unreachable,
|
||||
.i64 => unreachable,
|
||||
.f32 => if (args.signedness.? == .signed) return .i64_trunc_f32_s else return .i64_trunc_f32_u,
|
||||
.f64 => if (args.signedness.? == .signed) return .i64_trunc_f64_s else return .i64_trunc_f64_u,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.f32 => return .f32_trunc,
|
||||
.f64 => return .f64_trunc,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.nearest => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_nearest,
|
||||
.f64 => return .f64_nearest,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.sqrt => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_sqrt,
|
||||
.f64 => return .f64_sqrt,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.min => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_min,
|
||||
.f64 => return .f64_min,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.max => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_max,
|
||||
.f64 => return .f64_max,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.copysign => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
.f32 => return .f32_copysign,
|
||||
.f64 => return .f64_copysign,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
|
||||
.wrap => switch (args.valtype1.?) {
|
||||
@ -523,8 +564,10 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i32 => unreachable,
|
||||
.i64 => return .i32_wrap_i64,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.i64, .f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.convert => switch (args.valtype1.?) {
|
||||
.i32, .i64 => unreachable,
|
||||
@ -532,12 +575,15 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i32 => if (args.signedness.? == .signed) return .f32_convert_i32_s else return .f32_convert_i32_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .f32_convert_i64_s else return .f32_convert_i64_u,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.f64 => switch (args.valtype2.?) {
|
||||
.i32 => if (args.signedness.? == .signed) return .f64_convert_i32_s else return .f64_convert_i32_u,
|
||||
.i64 => if (args.signedness.? == .signed) return .f64_convert_i64_s else return .f64_convert_i64_u,
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.demote => if (args.valtype1.? == .f32 and args.valtype2.? == .f64) return .f32_demote_f64 else unreachable,
|
||||
.promote => if (args.valtype1.? == .f64 and args.valtype2.? == .f32) return .f64_promote_f32 else unreachable,
|
||||
@ -546,6 +592,7 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
.i64 => if (args.valtype2.? == .f64) return .i64_reinterpret_f64 else unreachable,
|
||||
.f32 => if (args.valtype2.? == .i32) return .f32_reinterpret_i32 else unreachable,
|
||||
.f64 => if (args.valtype2.? == .i64) return .f64_reinterpret_i64 else unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
.extend => switch (args.valtype1.?) {
|
||||
.i32 => switch (args.width.?) {
|
||||
@ -560,6 +607,7 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
|
||||
else => unreachable,
|
||||
},
|
||||
.f32, .f64 => unreachable,
|
||||
.v128 => unreachable, // handled independently
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -629,6 +677,10 @@ err_msg: *Module.ErrorMsg,
|
||||
/// List of all locals' types generated throughout this declaration
|
||||
/// used to emit locals count at start of 'code' section.
|
||||
locals: std.ArrayListUnmanaged(u8),
|
||||
/// List of simd128 immediates. Each value is stored as an array of bytes.
|
||||
/// This list will only be populated for 128bit-simd values when the target features
|
||||
/// are enabled also.
|
||||
simd_immediates: std.ArrayListUnmanaged([16]u8) = .{},
|
||||
/// The Target we're emitting (used to call intInfo)
|
||||
target: std.Target,
|
||||
/// Represents the wasm binary file that is being linked.
|
||||
@ -665,14 +717,17 @@ stack_alignment: u32 = 16,
|
||||
/// It is illegal to store a non-i32 valtype in this list.
|
||||
free_locals_i32: std.ArrayListUnmanaged(u32) = .{},
|
||||
/// A list of indexes which represents a local of valtype `i64`.
|
||||
/// It is illegal to store a non-i32 valtype in this list.
|
||||
/// It is illegal to store a non-i64 valtype in this list.
|
||||
free_locals_i64: std.ArrayListUnmanaged(u32) = .{},
|
||||
/// A list of indexes which represents a local of valtype `f32`.
|
||||
/// It is illegal to store a non-i32 valtype in this list.
|
||||
/// It is illegal to store a non-f32 valtype in this list.
|
||||
free_locals_f32: std.ArrayListUnmanaged(u32) = .{},
|
||||
/// A list of indexes which represents a local of valtype `f64`.
|
||||
/// It is illegal to store a non-i32 valtype in this list.
|
||||
/// It is illegal to store a non-f64 valtype in this list.
|
||||
free_locals_f64: std.ArrayListUnmanaged(u32) = .{},
|
||||
/// A list of indexes which represents a local of valtype `v127`.
|
||||
/// It is illegal to store a non-v128 valtype in this list.
|
||||
free_locals_v128: std.ArrayListUnmanaged(u32) = .{},
|
||||
|
||||
/// When in debug mode, this tracks if no `finishAir` was missed.
|
||||
/// Forgetting to call `finishAir` will cause the result to not be
|
||||
@ -699,12 +754,14 @@ pub fn deinit(func: *CodeGen) void {
|
||||
func.branches.deinit(func.gpa);
|
||||
func.blocks.deinit(func.gpa);
|
||||
func.locals.deinit(func.gpa);
|
||||
func.simd_immediates.deinit(func.gpa);
|
||||
func.mir_instructions.deinit(func.gpa);
|
||||
func.mir_extra.deinit(func.gpa);
|
||||
func.free_locals_i32.deinit(func.gpa);
|
||||
func.free_locals_i64.deinit(func.gpa);
|
||||
func.free_locals_f32.deinit(func.gpa);
|
||||
func.free_locals_f64.deinit(func.gpa);
|
||||
func.free_locals_v128.deinit(func.gpa);
|
||||
func.* = undefined;
|
||||
}
|
||||
|
||||
@ -867,6 +924,17 @@ fn addImm64(func: *CodeGen, imm: u64) error{OutOfMemory}!void {
|
||||
try func.addInst(.{ .tag = .i64_const, .data = .{ .payload = extra_index } });
|
||||
}
|
||||
|
||||
/// Accepts the index into the list of 128bit-immediates
|
||||
fn addImm128(func: *CodeGen, index: u32) error{OutOfMemory}!void {
|
||||
const simd_values = func.simd_immediates.items[index];
|
||||
const extra_index = @intCast(u32, func.mir_extra.items.len);
|
||||
// tag + 128bit value
|
||||
try func.mir_extra.ensureUnusedCapacity(func.gpa, 5);
|
||||
func.mir_extra.appendAssumeCapacity(std.wasm.simdOpcode(.v128_const));
|
||||
func.mir_extra.appendSliceAssumeCapacity(@alignCast(4, mem.bytesAsSlice(u32, &simd_values)));
|
||||
try func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
|
||||
}
|
||||
|
||||
fn addFloat64(func: *CodeGen, float: f64) error{OutOfMemory}!void {
|
||||
const extra_index = try func.addExtra(Mir.Float64.fromFloat64(float));
|
||||
try func.addInst(.{ .tag = .f64_const, .data = .{ .payload = extra_index } });
|
||||
@ -924,6 +992,10 @@ fn typeToValtype(ty: Type, target: std.Target) wasm.Valtype {
|
||||
},
|
||||
else => wasm.Valtype.i32,
|
||||
},
|
||||
.Vector => switch (determineSimdStoreStrategy(ty, target)) {
|
||||
.direct => wasm.Valtype.v128,
|
||||
.unrolled => wasm.Valtype.i32,
|
||||
},
|
||||
else => wasm.Valtype.i32, // all represented as reference/immediate
|
||||
};
|
||||
}
|
||||
@ -950,6 +1022,7 @@ fn emitWValue(func: *CodeGen, value: WValue) InnerError!void {
|
||||
.local => |idx| try func.addLabel(.local_get, idx.value),
|
||||
.imm32 => |val| try func.addImm32(@bitCast(i32, val)),
|
||||
.imm64 => |val| try func.addImm64(val),
|
||||
.imm128 => |val| try func.addImm128(val),
|
||||
.float32 => |val| try func.addInst(.{ .tag = .f32_const, .data = .{ .float32 = val } }),
|
||||
.float64 => |val| try func.addFloat64(val),
|
||||
.memory => |ptr| {
|
||||
@ -1016,6 +1089,10 @@ fn allocLocal(func: *CodeGen, ty: Type) InnerError!WValue {
|
||||
log.debug("reusing local ({d}) of type {}\n", .{ index, valtype });
|
||||
return WValue{ .local = .{ .value = index, .references = 1 } };
|
||||
},
|
||||
.v128 => if (func.free_locals_v128.popOrNull()) |index| {
|
||||
log.debug("reusing local ({d}) of type {}\n", .{ index, valtype });
|
||||
return WValue{ .local = .{ .value = index, .references = 1 } };
|
||||
},
|
||||
}
|
||||
log.debug("new local of type {}\n", .{valtype});
|
||||
// no local was free to be re-used, so allocate a new local instead
|
||||
@ -1098,7 +1175,6 @@ pub fn generate(
|
||||
.gpa = bin_file.allocator,
|
||||
.air = air,
|
||||
.liveness = liveness,
|
||||
// .values = .{},
|
||||
.code = code,
|
||||
.decl_index = func.owner_decl,
|
||||
.decl = bin_file.options.module.?.declPtr(func.owner_decl),
|
||||
@ -1481,9 +1557,9 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void {
|
||||
.imm64 => |val| val,
|
||||
else => unreachable,
|
||||
};
|
||||
// if the size (length) is more than 1024 bytes, we use a runtime loop instead to prevent
|
||||
// if the size (length) is more than 32 bytes, we use a runtime loop instead to prevent
|
||||
// binary size bloat.
|
||||
if (length > 1024) break :blk;
|
||||
if (length > 32) break :blk;
|
||||
var offset: u32 = 0;
|
||||
const lhs_base = dst.offset();
|
||||
const rhs_base = src.offset();
|
||||
@ -1612,7 +1688,6 @@ fn isByRef(ty: Type, target: std.Target) bool {
|
||||
=> return false,
|
||||
|
||||
.Array,
|
||||
.Vector,
|
||||
.Frame,
|
||||
.Union,
|
||||
=> return ty.hasRuntimeBitsIgnoreComptime(),
|
||||
@ -1625,6 +1700,7 @@ fn isByRef(ty: Type, target: std.Target) bool {
|
||||
}
|
||||
return ty.hasRuntimeBitsIgnoreComptime();
|
||||
},
|
||||
.Vector => return determineSimdStoreStrategy(ty, target) == .unrolled,
|
||||
.Int => return ty.intInfo(target).bits > 64,
|
||||
.Float => return ty.floatBits(target) > 64,
|
||||
.ErrorUnion => {
|
||||
@ -1647,6 +1723,26 @@ fn isByRef(ty: Type, target: std.Target) bool {
|
||||
}
|
||||
}
|
||||
|
||||
const SimdStoreStrategy = enum {
|
||||
direct,
|
||||
unrolled,
|
||||
};
|
||||
|
||||
/// For a given vector type, returns the `SimdStoreStrategy`.
|
||||
/// This means when a given type is 128 bits and either the simd128 or relaxed-simd
|
||||
/// features are enabled, the function will return `.direct`. This would allow to store
|
||||
/// it using a instruction, rather than an unrolled version.
|
||||
fn determineSimdStoreStrategy(ty: Type, target: std.Target) SimdStoreStrategy {
|
||||
std.debug.assert(ty.zigTypeTag() == .Vector);
|
||||
if (ty.bitSize(target) != 128) return .unrolled;
|
||||
const hasFeature = std.Target.wasm.featureSetHas;
|
||||
const features = target.cpu.features;
|
||||
if (hasFeature(features, .relaxed_simd) or hasFeature(features, .simd128)) {
|
||||
return .direct;
|
||||
}
|
||||
return .unrolled;
|
||||
}
|
||||
|
||||
/// Creates a new local for a pointer that points to memory with given offset.
|
||||
/// This can be used to get a pointer to a struct field, error payload, etc.
|
||||
/// By providing `modify` as action, it will modify the given `ptr_value` instead of making a new
|
||||
@ -2187,10 +2283,29 @@ fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerE
|
||||
const len = @intCast(u32, ty.abiSize(func.target));
|
||||
return func.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
.Struct, .Array, .Union, .Vector => if (isByRef(ty, func.target)) {
|
||||
.Struct, .Array, .Union => if (isByRef(ty, func.target)) {
|
||||
const len = @intCast(u32, ty.abiSize(func.target));
|
||||
return func.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
.Vector => switch (determineSimdStoreStrategy(ty, func.target)) {
|
||||
.unrolled => {
|
||||
const len = @intCast(u32, ty.abiSize(func.target));
|
||||
return func.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
.direct => {
|
||||
try func.emitWValue(lhs);
|
||||
try func.lowerToStack(rhs);
|
||||
// TODO: Add helper functions for simd opcodes
|
||||
const extra_index = @intCast(u32, func.mir_extra.items.len);
|
||||
// stores as := opcode, offset, alignment (opcode::memarg)
|
||||
try func.mir_extra.appendSlice(func.gpa, &[_]u32{
|
||||
std.wasm.simdOpcode(.v128_store),
|
||||
offset + lhs.offset(),
|
||||
ty.abiAlignment(func.target),
|
||||
});
|
||||
return func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
|
||||
},
|
||||
},
|
||||
.Pointer => {
|
||||
if (ty.isSlice()) {
|
||||
// store pointer first
|
||||
@ -2289,6 +2404,19 @@ fn load(func: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValu
|
||||
// load local's value from memory by its stack position
|
||||
try func.emitWValue(operand);
|
||||
|
||||
if (ty.zigTypeTag() == .Vector) {
|
||||
// TODO: Add helper functions for simd opcodes
|
||||
const extra_index = @intCast(u32, func.mir_extra.items.len);
|
||||
// stores as := opcode, offset, alignment (opcode::memarg)
|
||||
try func.mir_extra.appendSlice(func.gpa, &[_]u32{
|
||||
std.wasm.simdOpcode(.v128_load),
|
||||
offset + operand.offset(),
|
||||
ty.abiAlignment(func.target),
|
||||
});
|
||||
try func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
|
||||
return WValue{ .stack = {} };
|
||||
}
|
||||
|
||||
const abi_size = @intCast(u8, ty.abiSize(func.target));
|
||||
const opcode = buildOpcode(.{
|
||||
.valtype1 = typeToValtype(ty, func.target),
|
||||
@ -2766,10 +2894,24 @@ fn lowerConstant(func: *CodeGen, arg_val: Value, ty: Type) InnerError!WValue {
|
||||
const int_val = Value.initPayload(&payload.base);
|
||||
return func.lowerConstant(int_val, struct_obj.backing_int_ty);
|
||||
},
|
||||
.Vector => {
|
||||
assert(determineSimdStoreStrategy(ty, target) == .direct);
|
||||
var buf: [16]u8 = undefined;
|
||||
val.writeToMemory(ty, func.bin_file.base.options.module.?, &buf);
|
||||
return func.storeSimdImmd(buf);
|
||||
},
|
||||
else => |zig_type| return func.fail("Wasm TODO: LowerConstant for zigTypeTag {}", .{zig_type}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Stores the value as a 128bit-immediate value by storing it inside
|
||||
/// the list and returning the index into this list as `WValue`.
|
||||
fn storeSimdImmd(func: *CodeGen, value: [16]u8) !WValue {
|
||||
const index = @intCast(u32, func.simd_immediates.items.len);
|
||||
try func.simd_immediates.append(func.gpa, value);
|
||||
return WValue{ .imm128 = index };
|
||||
}
|
||||
|
||||
fn emitUndefined(func: *CodeGen, ty: Type) InnerError!WValue {
|
||||
switch (ty.zigTypeTag()) {
|
||||
.Bool, .ErrorSet => return WValue{ .imm32 = 0xaaaaaaaa },
|
||||
@ -4288,9 +4430,71 @@ fn airIntToFloat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
fn airSplat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
const ty_op = func.air.instructions.items(.data)[inst].ty_op;
|
||||
const operand = try func.resolveInst(ty_op.operand);
|
||||
const ty = func.air.typeOfIndex(inst);
|
||||
const elem_ty = ty.childType();
|
||||
|
||||
_ = operand;
|
||||
return func.fail("TODO: Implement wasm airSplat", .{});
|
||||
if (determineSimdStoreStrategy(ty, func.target) == .direct) blk: {
|
||||
switch (operand) {
|
||||
// when the operand lives in the linear memory section, we can directly
|
||||
// load and splat the value at once. Meaning we do not first have to load
|
||||
// the scalar value onto the stack.
|
||||
.stack_offset, .memory, .memory_offset => {
|
||||
const opcode = switch (elem_ty.bitSize(func.target)) {
|
||||
8 => std.wasm.simdOpcode(.v128_load8_splat),
|
||||
16 => std.wasm.simdOpcode(.v128_load16_splat),
|
||||
32 => std.wasm.simdOpcode(.v128_load32_splat),
|
||||
64 => std.wasm.simdOpcode(.v128_load64_splat),
|
||||
else => break :blk, // Cannot make use of simd-instructions
|
||||
};
|
||||
const result = try func.allocLocal(ty);
|
||||
try func.emitWValue(operand);
|
||||
// TODO: Add helper functions for simd opcodes
|
||||
const extra_index = @intCast(u32, func.mir_extra.items.len);
|
||||
// stores as := opcode, offset, alignment (opcode::memarg)
|
||||
try func.mir_extra.appendSlice(func.gpa, &[_]u32{
|
||||
opcode,
|
||||
operand.offset(),
|
||||
elem_ty.abiAlignment(func.target),
|
||||
});
|
||||
try func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
|
||||
try func.addLabel(.local_set, result.local.value);
|
||||
return func.finishAir(inst, result, &.{ty_op.operand});
|
||||
},
|
||||
.local => {
|
||||
const opcode = switch (elem_ty.bitSize(func.target)) {
|
||||
8 => std.wasm.simdOpcode(.i8x16_splat),
|
||||
16 => std.wasm.simdOpcode(.i16x8_splat),
|
||||
32 => if (elem_ty.isInt()) std.wasm.simdOpcode(.i32x4_splat) else std.wasm.simdOpcode(.f32x4_splat),
|
||||
64 => if (elem_ty.isInt()) std.wasm.simdOpcode(.i64x2_splat) else std.wasm.simdOpcode(.f64x2_splat),
|
||||
else => break :blk, // Cannot make use of simd-instructions
|
||||
};
|
||||
const result = try func.allocLocal(ty);
|
||||
try func.emitWValue(operand);
|
||||
const extra_index = @intCast(u32, func.mir_extra.items.len);
|
||||
try func.mir_extra.append(func.gpa, opcode);
|
||||
try func.addInst(.{ .tag = .simd, .data = .{ .payload = extra_index } });
|
||||
try func.addLabel(.local_set, result.local.value);
|
||||
return func.finishAir(inst, result, &.{ty_op.operand});
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
const elem_size = elem_ty.bitSize(func.target);
|
||||
const vector_len = @intCast(usize, ty.vectorLen());
|
||||
if ((!std.math.isPowerOfTwo(elem_size) or elem_size % 8 != 0) and vector_len > 1) {
|
||||
return func.fail("TODO: WebAssembly `@splat` for arbitrary element bitsize {d}", .{elem_size});
|
||||
}
|
||||
|
||||
const result = try func.allocStack(ty);
|
||||
const elem_byte_size = @intCast(u32, elem_ty.abiSize(func.target));
|
||||
var index: usize = 0;
|
||||
var offset: u32 = 0;
|
||||
while (index < vector_len) : (index += 1) {
|
||||
try func.store(result, operand, elem_ty, offset);
|
||||
offset += elem_byte_size;
|
||||
}
|
||||
|
||||
return func.finishAir(inst, result, &.{ty_op.operand});
|
||||
}
|
||||
|
||||
fn airSelect(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
|
||||
@ -240,6 +240,7 @@ pub fn emitMir(emit: *Emit) InnerError!void {
|
||||
.i64_ctz => try emit.emitTag(tag),
|
||||
|
||||
.extended => try emit.emitExtended(inst),
|
||||
.simd => try emit.emitSimd(inst),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -341,11 +342,14 @@ fn emitMemArg(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void {
|
||||
const extra_index = emit.mir.instructions.items(.data)[inst].payload;
|
||||
const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index).data;
|
||||
try emit.code.append(@enumToInt(tag));
|
||||
try encodeMemArg(mem_arg, emit.code.writer());
|
||||
}
|
||||
|
||||
fn encodeMemArg(mem_arg: Mir.MemArg, writer: anytype) !void {
|
||||
// wasm encodes alignment as power of 2, rather than natural alignment
|
||||
const encoded_alignment = @ctz(mem_arg.alignment);
|
||||
try leb128.writeULEB128(emit.code.writer(), encoded_alignment);
|
||||
try leb128.writeULEB128(emit.code.writer(), mem_arg.offset);
|
||||
try leb128.writeULEB128(writer, encoded_alignment);
|
||||
try leb128.writeULEB128(writer, mem_arg.offset);
|
||||
}
|
||||
|
||||
fn emitCall(emit: *Emit, inst: Mir.Inst.Index) !void {
|
||||
@ -426,6 +430,38 @@ fn emitExtended(emit: *Emit, inst: Mir.Inst.Index) !void {
|
||||
}
|
||||
}
|
||||
|
||||
fn emitSimd(emit: *Emit, inst: Mir.Inst.Index) !void {
|
||||
const extra_index = emit.mir.instructions.items(.data)[inst].payload;
|
||||
const opcode = emit.mir.extra[extra_index];
|
||||
const writer = emit.code.writer();
|
||||
try emit.code.append(0xFD);
|
||||
try leb128.writeULEB128(writer, opcode);
|
||||
switch (@intToEnum(std.wasm.SimdOpcode, opcode)) {
|
||||
.v128_store,
|
||||
.v128_load,
|
||||
.v128_load8_splat,
|
||||
.v128_load16_splat,
|
||||
.v128_load32_splat,
|
||||
.v128_load64_splat,
|
||||
=> {
|
||||
const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index + 1).data;
|
||||
try encodeMemArg(mem_arg, writer);
|
||||
},
|
||||
.v128_const => {
|
||||
const simd_value = emit.mir.extra[extra_index + 1 ..][0..4];
|
||||
try writer.writeAll(std.mem.asBytes(simd_value));
|
||||
},
|
||||
.i8x16_splat,
|
||||
.i16x8_splat,
|
||||
.i32x4_splat,
|
||||
.i64x2_splat,
|
||||
.f32x4_splat,
|
||||
.f64x2_splat,
|
||||
=> {}, // opcode already written
|
||||
else => |tag| return emit.fail("TODO: Implement simd instruction: {s}\n", .{@tagName(tag)}),
|
||||
}
|
||||
}
|
||||
|
||||
fn emitMemFill(emit: *Emit) !void {
|
||||
try emit.code.append(0xFC);
|
||||
try emit.code.append(0x0B);
|
||||
|
||||
@ -518,6 +518,12 @@ pub const Inst = struct {
|
||||
///
|
||||
/// The `data` field depends on the extension instruction
|
||||
extended = 0xFC,
|
||||
/// The instruction consists of a simd opcode.
|
||||
/// The actual simd-opcode is found at payload's index.
|
||||
///
|
||||
/// The `data` field depends on the simd instruction and
|
||||
/// may contain additional data.
|
||||
simd = 0xFD,
|
||||
/// Contains a symbol to a function pointer
|
||||
/// uses `label`
|
||||
///
|
||||
@ -578,7 +584,7 @@ pub fn deinit(self: *Mir, gpa: std.mem.Allocator) void {
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
pub fn extraData(self: Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
|
||||
pub fn extraData(self: *const Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
|
||||
const fields = std.meta.fields(T);
|
||||
var i: usize = index;
|
||||
var result: T = undefined;
|
||||
|
||||
@ -808,6 +808,54 @@ pub fn generateSymbol(
|
||||
}
|
||||
return Result{ .appended = {} };
|
||||
},
|
||||
.Vector => switch (typed_value.val.tag()) {
|
||||
.bytes => {
|
||||
const bytes = typed_value.val.castTag(.bytes).?.data;
|
||||
const len = @intCast(usize, typed_value.ty.arrayLen());
|
||||
try code.ensureUnusedCapacity(len);
|
||||
code.appendSliceAssumeCapacity(bytes[0..len]);
|
||||
return Result{ .appended = {} };
|
||||
},
|
||||
.aggregate => {
|
||||
const elem_vals = typed_value.val.castTag(.aggregate).?.data;
|
||||
const elem_ty = typed_value.ty.elemType();
|
||||
const len = @intCast(usize, typed_value.ty.arrayLen());
|
||||
for (elem_vals[0..len]) |elem_val| {
|
||||
switch (try generateSymbol(bin_file, src_loc, .{
|
||||
.ty = elem_ty,
|
||||
.val = elem_val,
|
||||
}, code, debug_output, reloc_info)) {
|
||||
.appended => {},
|
||||
.externally_managed => |slice| {
|
||||
code.appendSliceAssumeCapacity(slice);
|
||||
},
|
||||
.fail => |em| return Result{ .fail = em },
|
||||
}
|
||||
}
|
||||
return Result{ .appended = {} };
|
||||
},
|
||||
.repeated => {
|
||||
const array = typed_value.val.castTag(.repeated).?.data;
|
||||
const elem_ty = typed_value.ty.childType();
|
||||
const len = typed_value.ty.arrayLen();
|
||||
|
||||
var index: u64 = 0;
|
||||
while (index < len) : (index += 1) {
|
||||
switch (try generateSymbol(bin_file, src_loc, .{
|
||||
.ty = elem_ty,
|
||||
.val = array,
|
||||
}, code, debug_output, reloc_info)) {
|
||||
.appended => {},
|
||||
.externally_managed => |slice| {
|
||||
code.appendSliceAssumeCapacity(slice);
|
||||
},
|
||||
.fail => |em| return Result{ .fail = em },
|
||||
}
|
||||
}
|
||||
return Result{ .appended = {} };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => |t| {
|
||||
return Result{
|
||||
.fail = try ErrorMsg.create(
|
||||
|
||||
@ -138,7 +138,6 @@ test "vector bit operators" {
|
||||
}
|
||||
|
||||
test "implicit cast vector to array" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
@ -157,7 +156,6 @@ test "implicit cast vector to array" {
|
||||
}
|
||||
|
||||
test "array to vector" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
@ -235,7 +233,6 @@ test "vector casts of sizes not divisible by 8" {
|
||||
}
|
||||
|
||||
test "vector @splat" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
@ -284,7 +281,6 @@ test "vector @splat" {
|
||||
}
|
||||
|
||||
test "load vector elements via comptime index" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
@ -307,7 +303,6 @@ test "load vector elements via comptime index" {
|
||||
}
|
||||
|
||||
test "store vector elements via comptime index" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
@ -336,7 +331,6 @@ test "store vector elements via comptime index" {
|
||||
}
|
||||
|
||||
test "load vector elements via runtime index" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
@ -359,7 +353,6 @@ test "load vector elements via runtime index" {
|
||||
}
|
||||
|
||||
test "store vector elements via runtime index" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
@ -383,7 +376,6 @@ test "store vector elements via runtime index" {
|
||||
}
|
||||
|
||||
test "initialize vector which is a struct field" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user