diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 2a392f378b..a009cf6883 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -995,7 +995,6 @@ fn addInst(func: *Func, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { .pseudo_dbg_prologue_end, .pseudo_dbg_line_column, .pseudo_dbg_epilogue_begin, - .pseudo_mv, .pseudo_dead, => false, }) wip_mir_log.debug("{}", .{func.fmtWipMir(result_index)}); @@ -2445,6 +2444,7 @@ fn genBinOp( .Vector => { const mir_tag: Mir.Inst.Tag = switch (tag) { .add => .vaddvv, + .sub => .vsubvv, else => return func.fail("TODO: genBinOp {s} Vector", .{@tagName(tag)}), }; @@ -2454,7 +2454,6 @@ fn genBinOp( const elem_size = lhs_ty.childType(zcu).bitSize(pt); try func.setVl(.zero, num_elem, .{ - .vlmul = .mf2, .vsew = switch (elem_size) { 8 => .@"8", 16 => .@"16", @@ -2462,6 +2461,7 @@ fn genBinOp( 64 => .@"64", else => unreachable, }, + .vlmul = .m1, .vma = true, .vta = true, }); @@ -2472,8 +2472,8 @@ fn genBinOp( .data = .{ .r_type = .{ .rd = dst_reg, - .rs1 = lhs_reg, - .rs2 = rhs_reg, + .rs1 = rhs_reg, + .rs2 = lhs_reg, }, }, }); @@ -3576,20 +3576,54 @@ fn airArrayElemVal(func: *Func, inst: Air.Inst.Index) !void { else => try func.genSetReg(Type.usize, addr_reg, array_mcv.address()), } + const dst_mcv = try func.allocRegOrMem(result_ty, inst, false); + + if (array_ty.isVector(zcu)) { + // we need to load the vector, vslidedown to get the element we want + // and store that element at in a load frame. + + const src_reg, const src_lock = try func.allocReg(.vector); + defer func.register_manager.unlockReg(src_lock); + + // load the vector into a temporary register + try func.genCopy(array_ty, .{ .register = src_reg }, .{ .indirect = .{ .reg = addr_reg } }); + + // we need to construct a 1xbitSize vector because of how lane splitting works in RISC-V + const single_ty = try pt.vectorType(.{ .child = elem_ty.toIntern(), .len = 1 }); + + // we can do a shortcut here where we don't need a vslicedown + // and can just copy to the frame index. + if (!(index_mcv == .immediate and index_mcv.immediate == 0)) { + const index_reg = try func.copyToTmpRegister(Type.usize, index_mcv); + + _ = try func.addInst(.{ + .tag = .vslidedownvx, + .ops = .rrr, + .data = .{ .r_type = .{ + .rd = src_reg, + .rs1 = index_reg, + .rs2 = src_reg, + } }, + }); + } + + try func.genCopy(single_ty, dst_mcv, .{ .register = src_reg }); + break :result dst_mcv; + } + const offset_reg = try func.elemOffset(index_ty, index_mcv, elem_abi_size); const offset_lock = func.register_manager.lockRegAssumeUnused(offset_reg); defer func.register_manager.unlockReg(offset_lock); - - const dst_mcv = try func.allocRegOrMem(result_ty, inst, false); _ = try func.addInst(.{ .tag = .add, .ops = .rrr, .data = .{ .r_type = .{ .rd = addr_reg, - .rs1 = offset_reg, - .rs2 = addr_reg, + .rs1 = addr_reg, + .rs2 = offset_reg, } }, }); + try func.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); break :result dst_mcv; }; @@ -5965,6 +5999,27 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError! if (src_reg.id() == reg.id()) return; + // there is no instruction for loading the contents of a vector register + // into an integer register, however we can cheat a bit by setting the element + // size to the total size of the vector, and vmv.x.s will work then + if (src_reg.class() == .vector) { + try func.setVl(.zero, 0, .{ + .vsew = switch (ty.totalVectorBits(pt)) { + 8 => .@"8", + 16 => .@"16", + 32 => .@"32", + 64 => .@"64", + else => |vec_bits| return func.fail("TODO: genSetReg vec -> {s} bits {d}", .{ + @tagName(reg.class()), + vec_bits, + }), + }, + .vlmul = .m1, + .vta = true, + .vma = true, + }); + } + // mv reg, src_reg _ = try func.addInst(.{ .tag = .pseudo, @@ -5978,57 +6033,28 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError! .register_pair => return func.fail("genSetReg should we allow reg -> reg_pair?", .{}), .load_frame => |frame| { if (reg.class() == .vector) { - if (abi_size > 8) - return func.fail("TODO: genSetReg vectors > 8", .{}); - - const temp_reg = try func.register_manager.allocReg(null, abi.Registers.Integer.temporary); - const temp_lock = func.register_manager.lockRegAssumeUnused(temp_reg); - defer func.register_manager.unlockReg(temp_lock); - - try func.setVl(.zero, 1, .{ - .vsew = switch (abi_size) { - 1 => .@"8", - 2 => .@"16", - 4 => .@"32", - 8 => .@"64", - else => unreachable, - }, - .vlmul = .m1, - .vma = true, - .vta = true, - }); - - try func.genCopy(ty, .{ .register = temp_reg }, .{ .load_frame = frame }); + const addr_reg, const addr_lock = try func.allocReg(.int); + defer func.register_manager.unlockReg(addr_lock); + try func.genCopy(ty, .{ .register = addr_reg }, src_mcv.address()); + try func.genCopy(ty, .{ .register = reg }, .{ .indirect = .{ .reg = addr_reg } }); + } else { _ = try func.addInst(.{ .tag = .pseudo, - .ops = .pseudo_mv, - .data = .{ - .rr = .{ - .rd = reg, - .rs = temp_reg, + .ops = .pseudo_load_rm, + .data = .{ .rm = .{ + .r = reg, + .m = .{ + .base = .{ .frame = frame.index }, + .mod = .{ + .size = func.memSize(ty), + .unsigned = ty.isUnsignedInt(zcu), + .disp = frame.off, + }, }, - }, + } }, }); - - return; } - - _ = try func.addInst(.{ - .tag = .pseudo, - .ops = .pseudo_load_rm, - .data = .{ .rm = .{ - .r = reg, - .m = .{ - .base = .{ .frame = frame.index }, - .mod = .{ - .size = func.memSize(ty), - .unsigned = ty.isUnsignedInt(zcu), - .disp = frame.off, - }, - }, - } }, - }); }, .memory => |addr| { try func.genSetReg(ty, reg, .{ .immediate = addr }); @@ -6072,20 +6098,64 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError! }); }, .indirect => |reg_off| { - const float_class = dst_reg_class == .float; + const load_tag: Mir.Inst.Tag = switch (reg.class()) { + .float => switch (abi_size) { + 1 => unreachable, // Zig does not support 8-bit floats + 2 => return func.fail("TODO: genSetReg indirect 16-bit float", .{}), + 4 => .flw, + 8 => .fld, + else => return std.debug.panic("TODO: genSetReg for float size {d}", .{abi_size}), + }, + .int => switch (abi_size) { + 1 => .lb, + 2 => .lh, + 4 => .lw, + 8 => .ld, + else => return std.debug.panic("TODO: genSetReg for int size {d}", .{abi_size}), + }, + .vector => { + assert(reg_off.off == 0); - const load_tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (float_class) - unreachable // Zig does not support 8-bit floats - else - .lb, - 2 => if (float_class) - return func.fail("TODO: genSetReg indirect 16-bit float", .{}) - else - .lh, - 4 => if (float_class) .flw else .lw, - 8 => if (float_class) .fld else .ld, - else => return std.debug.panic("TODO: genSetReg for size {d}", .{abi_size}), + // There is no vector instruction for loading with an offset to a base register, + // so we need to get an offset register containing the address of the vector first + // and load from it. + const len: u5 = math.cast(u5, ty.vectorLen(zcu)) orelse { + return func.fail("TODO: genSetReg load_frame -> vec reg, vector length doesn't fit into imm avl", .{}); + }; + const elem_ty = ty.childType(zcu); + const elem_size = elem_ty.abiSize(pt); + + try func.setVl(.zero, len, .{ + .vsew = switch (elem_size) { + 1 => .@"8", + 2 => .@"16", + 4 => .@"32", + 8 => .@"64", + else => unreachable, + }, + .vlmul = .m1, + .vma = true, + .vta = true, + }); + + _ = try func.addInst(.{ + .tag = .pseudo, + .ops = .pseudo_load_rm, + .data = .{ .rm = .{ + .r = reg, + .m = .{ + .base = .{ .reg = reg_off.reg }, + .mod = .{ + .size = func.memSize(elem_ty), + .unsigned = false, + .disp = 0, + }, + }, + } }, + }); + + return; + }, }; _ = try func.addInst(.{ @@ -6100,7 +6170,6 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError! }, .lea_symbol => |sym_off| { assert(sym_off.off == 0); - const atom_index = try func.symbolIndex(); _ = try func.addInst(.{ @@ -6166,12 +6235,12 @@ fn genSetMem( => switch (abi_size) { 0 => {}, 1, 2, 4, 8 => { - // no matter what type, it should use an integer register - const src_reg = try func.copyToTmpRegister(ty, src_mcv); - const src_lock = func.register_manager.lockRegAssumeUnused(src_reg); + const reg = try func.register_manager.allocReg(null, abi.Registers.Integer.temporary); + const src_lock = func.register_manager.lockRegAssumeUnused(reg); defer func.register_manager.unlockReg(src_lock); - try func.genSetMem(base, disp, ty, .{ .register = src_reg }); + try func.genSetReg(ty, reg, src_mcv); + try func.genSetMem(base, disp, ty, .{ .register = reg }); }, else => try func.genInlineMemcpy( dst_ptr_mcv, @@ -6180,6 +6249,46 @@ fn genSetMem( ), }, .register => |reg| { + if (reg.class() == .vector) { + const addr_reg = try func.copyToTmpRegister(Type.usize, dst_ptr_mcv); + + const num_elem: u5 = math.cast(u5, ty.vectorLen(pt.zcu)) orelse { + return func.fail("TODO: genBinOp use vsetvli for larger avl sizes", .{}); + }; + const elem_size = ty.childType(pt.zcu).bitSize(pt); + + try func.setVl(.zero, num_elem, .{ + .vsew = switch (elem_size) { + 8 => .@"8", + 16 => .@"16", + 32 => .@"32", + 64 => .@"64", + else => unreachable, + }, + .vlmul = .m1, + .vma = true, + .vta = true, + }); + + _ = try func.addInst(.{ + .tag = .pseudo, + .ops = .pseudo_store_rm, + .data = .{ .rm = .{ + .r = reg, + .m = .{ + .base = .{ .reg = addr_reg }, + .mod = .{ + .disp = 0, + .size = func.memSize(ty.childType(pt.zcu)), + .unsigned = false, + }, + }, + } }, + }); + + return; + } + const mem_size = switch (base) { .frame => |base_fi| mem_size: { assert(disp >= 0); diff --git a/src/arch/riscv64/Emit.zig b/src/arch/riscv64/Emit.zig index 0c11a66a8d..bc972e86b9 100644 --- a/src/arch/riscv64/Emit.zig +++ b/src/arch/riscv64/Emit.zig @@ -75,7 +75,7 @@ pub fn emitMir(emit: *Emit) Error!void { .r_info = (@as(u64, @intCast(symbol.sym_index)) << 32) | lo_r_type, .r_addend = 0, }); - } else return emit.fail("TODO: load_symbol_reloc non-ELF", .{}); + } else unreachable; }, .call_extern_fn_reloc => |symbol| { if (emit.bin_file.cast(link.File.Elf)) |elf_file| { diff --git a/src/arch/riscv64/Encoding.zig b/src/arch/riscv64/Encoding.zig index 2d285419d1..93df55a5ed 100644 --- a/src/arch/riscv64/Encoding.zig +++ b/src/arch/riscv64/Encoding.zig @@ -116,8 +116,12 @@ const Enc = struct { }; const VecWidth = enum(u3) { + // zig fmt: off + @"8" = 0b000, + @"16" = 0b101, @"32" = 0b110, @"64" = 0b111, + // zig fmt: on }; const VecType = enum(u3) { @@ -266,15 +270,26 @@ pub const Mnemonic = enum { fsgnjxd, // V Extension + vle8v, + vle16v, vle32v, vle64v, + vse8v, + vse16v, vse32v, vse64v, + vsoxei8v, + vaddvv, - vadcxv, - vadcvx, + vsubvv, + + vadcvv, + + vmvvx, + + vslidedownvx, // MISC fence, @@ -431,19 +446,25 @@ pub const Mnemonic = enum { // LOAD_FP .flw => .{ .opcode = .LOAD_FP, .data = .{ .f = .{ .funct3 = 0b010 } } }, - .fld => .{ .opcode = .LOAD_FP, .data = .{ .f = .{ .funct3 = 0b011 } } }, - - .vle32v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } }, - .vle64v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } }, + .fld => .{ .opcode = .LOAD_FP, .data = .{ .f = .{ .funct3 = 0b011 } } }, + + .vle8v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"8", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + .vle16v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"16", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + .vle32v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + .vle64v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, // STORE_FP - .fsw => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b010 } } }, - .fsd => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b011 } } }, + .fsw => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b010 } } }, + .fsd => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b011 } } }, - .vse32v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } }, - .vse64v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } }, + .vse8v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"8", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + .vse16v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"16", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + .vse32v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + .vse64v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } }, + + .vsoxei8v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"8", .umop = .unit, .vm = true, .mop = .ord, .mew = false, .nf = 0b000 } } }, // JALR @@ -516,11 +537,15 @@ pub const Mnemonic = enum { .amomaxud => .{ .opcode = .AMO, .data = .{ .amo = .{ .width = .D, .funct5 = 0b11100 } } }, // OP_V - .vsetivli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } }, - .vsetvli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } }, - .vaddvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000000, .funct3 = .OPIVV } } }, - .vadcxv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVX } } }, - .vadcvx => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVV } } }, + .vsetivli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } }, + .vsetvli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } }, + .vaddvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000000, .funct3 = .OPIVV } } }, + .vsubvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000010, .funct3 = .OPIVV } } }, + + .vadcvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVV } } }, + .vmvvx => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010111, .funct3 = .OPIVX } } }, + + .vslidedownvx => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b001111, .funct3 = .OPIVX } } }, // zig fmt: on }; @@ -663,13 +688,23 @@ pub const InstEnc = enum { .fsgnjxs, .fsgnjxd, + .vle8v, + .vle16v, .vle32v, .vle64v, + + .vse8v, + .vse16v, .vse32v, .vse64v, + + .vsoxei8v, + .vaddvv, - .vadcxv, - .vadcvx, + .vsubvv, + .vadcvv, + .vmvvx, + .vslidedownvx, => .R, .ecall, diff --git a/src/arch/riscv64/Lower.zig b/src/arch/riscv64/Lower.zig index 9663932b76..012f520485 100644 --- a/src/arch/riscv64/Lower.zig +++ b/src/arch/riscv64/Lower.zig @@ -98,12 +98,8 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct { .dword => .fld, }, .vector => switch (src_size) { - .byte, - .hword, - => return lower.fail( - "TODO: lowerMir pseudo_load_rm support {s} vector", - .{@tagName(src_size)}, - ), + .byte => .vle8v, + .hword => .vle32v, .word => .vle32v, .dword => .vle64v, }, @@ -118,10 +114,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct { }); }, .vector => { + assert(frame_loc.disp == 0); try lower.emit(tag, &.{ .{ .reg = rm.r }, .{ .reg = frame_loc.base }, - .{ .reg = .x0 }, + .{ .reg = .zero }, }); }, } @@ -146,12 +143,8 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct { .dword => .fsd, }, .vector => switch (dest_size) { - .byte, - .hword, - => return lower.fail( - "TODO: lowerMir pseudo_load_rm support {s} vector", - .{@tagName(dest_size)}, - ), + .byte => .vse8v, + .hword => .vse16v, .word => .vse32v, .dword => .vse64v, }, @@ -166,10 +159,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct { }); }, .vector => { + assert(frame_loc.disp == 0); try lower.emit(tag, &.{ - .{ .reg = frame_loc.base }, .{ .reg = rm.r }, - .{ .reg = .x0 }, + .{ .reg = frame_loc.base }, + .{ .reg = .zero }, }); }, } @@ -204,20 +198,20 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct { }); }, .vector => { - try lower.emit(.vadcxv, &.{ + try lower.emit(.vmvvx, &.{ .{ .reg = rr.rd }, .{ .reg = rr.rs }, - .{ .reg = .zero }, + .{ .reg = .x0 }, }); }, .float => return lower.fail("TODO: lowerMir pseudo_mv int -> {s}", .{@tagName(dst_class)}), }, .vector => switch (dst_class) { .int => { - try lower.emit(.vadcvx, &.{ + try lower.emit(.vadcvv, &.{ .{ .reg = rr.rd }, - .{ .reg = rr.rs }, .{ .reg = .zero }, + .{ .reg = rr.rs }, }); }, .float, .vector => return lower.fail("TODO: lowerMir pseudo_mv vector -> {s}", .{@tagName(dst_class)}), diff --git a/src/arch/riscv64/Mir.zig b/src/arch/riscv64/Mir.zig index 24b83e6381..1d8a948f9a 100644 --- a/src/arch/riscv64/Mir.zig +++ b/src/arch/riscv64/Mir.zig @@ -142,6 +142,8 @@ pub const Inst = struct { vsetivli, vsetvl, vaddvv, + vsubvv, + vslidedownvx, // A Extension Instructions amo,