diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 39eaa77ad5..2c6f2b33b7 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -1164,7 +1164,20 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_len for {}", .{self.target.cpu.arch}); + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const mcv = try self.resolveInst(ty_op.operand); + switch (mcv) { + .dead, .unreach => unreachable, + .register => unreachable, // a slice doesn't fit in one register + .stack_offset => |off| { + break :result MCValue{ .stack_offset = off + 8 }; + }, + .memory => |addr| { + break :result MCValue{ .memory = addr + 8 }; + }, + else => return self.fail("TODO implement slice_len for {}", .{mcv}), + } + }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -1183,10 +1196,114 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const is_volatile = false; // TODO const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_elem_val for {}", .{self.target.cpu.arch}); + + if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); + const result: MCValue = result: { + const slice_mcv = try self.resolveInst(bin_op.lhs); + + // TODO optimize for the case where the index is a constant, + // i.e. index_mcv == .immediate + const index_mcv = try self.resolveInst(bin_op.rhs); + const index_is_register = index_mcv == .register; + + const slice_ty = self.air.typeOf(bin_op.lhs); + const elem_ty = slice_ty.childType(); + const elem_size = elem_ty.abiSize(self.target.*); + + var buf: Type.SlicePtrFieldTypeBuffer = undefined; + const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); + + if (index_is_register) self.register_manager.freezeRegs(&.{index_mcv.register}); + defer if (index_is_register) self.register_manager.unfreezeRegs(&.{index_mcv.register}); + + const base_mcv: MCValue = switch (slice_mcv) { + .stack_offset => |off| .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, .{ .stack_offset = off + 8 }) }, + else => return self.fail("TODO slice_elem_val when slice is {}", .{slice_mcv}), + }; + self.register_manager.freezeRegs(&.{base_mcv.register}); + + // TODO implement optimized ldr for airSliceElemVal + const dst_mcv = try self.allocRegOrMem(inst, true); + + const offset_mcv = try self.genMulConstant(bin_op.rhs, @intCast(u32, elem_size)); + assert(offset_mcv == .register); // result of multiplication should always be register + self.register_manager.freezeRegs(&.{offset_mcv.register}); + + const addr_reg = try self.register_manager.allocReg(null); + self.register_manager.freezeRegs(&.{addr_reg}); + defer self.register_manager.unfreezeRegs(&.{addr_reg}); + + _ = try self.addInst(.{ + .tag = .add_shifted_register, + .data = .{ .rrr_imm6_shift = .{ + .rd = addr_reg, + .rn = base_mcv.register, + .rm = offset_mcv.register, + .imm6 = 0, + .shift = .lsl, + } }, + }); + + // At this point in time, neither the base register + // nor the offset register contains any valuable data + // anymore. + self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register }); + + try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type); + + break :result dst_mcv; + }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } +fn genMulConstant(self: *Self, op: Air.Inst.Ref, imm: u32) !MCValue { + const lhs = try self.resolveInst(op); + const rhs = MCValue{ .immediate = imm }; + + const lhs_is_register = lhs == .register; + + if (lhs_is_register) self.register_manager.freezeRegs(&.{lhs.register}); + defer if (lhs_is_register) self.register_manager.unfreezeRegs(&.{lhs.register}); + + // Destination must be a register + // LHS must be a register + // RHS must be a register + var dst_mcv: MCValue = undefined; + var lhs_mcv: MCValue = lhs; + var rhs_mcv: MCValue = rhs; + + // Allocate registers for operands and/or destination + // Allocate 1 or 2 registers + if (lhs_is_register) { + // Move RHS to register + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) }; + rhs_mcv = dst_mcv; + } else { + // Move LHS and RHS to register + const regs = try self.register_manager.allocRegs(2, .{ null, null }); + lhs_mcv = MCValue{ .register = regs[0] }; + rhs_mcv = MCValue{ .register = regs[1] }; + dst_mcv = lhs_mcv; + } + + // Move the operands to the newly allocated registers + if (!lhs_is_register) { + try self.genSetReg(self.air.typeOf(op), lhs_mcv.register, lhs); + } + try self.genSetReg(Type.initTag(.usize), rhs_mcv.register, rhs); + + _ = try self.addInst(.{ + .tag = .mul, + .data = .{ .rrr = .{ + .rd = dst_mcv.register, + .rn = lhs_mcv.register, + .rm = rhs_mcv.register, + } }, + }); + + return dst_mcv; +} + fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; @@ -1310,6 +1427,16 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo .undef => unreachable, .compare_flags_signed, .compare_flags_unsigned => unreachable, .embedded_in_code => unreachable, + .register => |dst_reg| { + _ = try self.addInst(.{ + .tag = .ldr_immediate, + .data = .{ .load_store_register_immediate = .{ + .rt = dst_reg, + .rn = addr_reg, + .offset = Instruction.LoadStoreOffset.none.immediate, + } }, + }); + }, .stack_offset => |off| { if (elem_ty.abiSize(self.target.*) <= 8) { const tmp_reg = try self.register_manager.allocReg(null); @@ -2590,8 +2717,61 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro if (stack_offset == off) return; // Copy stack variable to itself; nothing to do. - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (ty.abiSize(self.target.*) <= ptr_bytes) { + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); + } else { + // TODO optimize the register allocation + const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }); + self.register_manager.freezeRegs(®s); + defer self.register_manager.unfreezeRegs(®s); + + const src_reg = regs[0]; + const dst_reg = regs[1]; + const len_reg = regs[2]; + const count_reg = regs[3]; + const tmp_reg = regs[4]; + + // sub src_reg, fp, #off + const adj_src_offset = off + @intCast(u32, ty.abiSize(self.target.*)); + const src_offset = math.cast(u12, adj_src_offset) catch return self.fail("TODO load: larger stack offsets", .{}); + _ = try self.addInst(.{ + .tag = .sub_immediate, + .data = .{ .rr_imm12_sh = .{ + .rd = src_reg, + .rn = .x29, + .imm12 = src_offset, + } }, + }); + + // sub dst_reg, fp, #stack_offset + const adj_dst_off = stack_offset + @intCast(u32, ty.abiSize(self.target.*)); + const dst_offset = math.cast(u12, adj_dst_off) catch return self.fail("TODO load: larger stack offsets", .{}); + _ = try self.addInst(.{ + .tag = .sub_immediate, + .data = .{ .rr_imm12_sh = .{ + .rd = dst_reg, + .rn = .x29, + .imm12 = dst_offset, + } }, + }); + + // mov len, #elem_size + const elem_size = @intCast(u32, ty.abiSize(self.target.*)); + const len_imm = math.cast(u16, elem_size) catch return self.fail("TODO load: larger stack offsets", .{}); + _ = try self.addInst(.{ + .tag = .movk, + .data = .{ .r_imm16_sh = .{ + .rd = len_reg, + .imm16 = len_imm, + } }, + }); + + // memcpy(src, dst, len) + try self.genInlineMemcpy(src_reg, dst_reg, len_reg, count_reg, tmp_reg); + } }, } } @@ -2711,7 +2891,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } }, }); }, - else => return self.fail("TODO implement genSetReg other types abi_size={}", .{abi_size}), + 3 => return self.fail("TODO implement genSetReg types size 3", .{}), + else => unreachable, } }, else => return self.fail("TODO implement genSetReg for aarch64 {}", .{mcv}), diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 5b2610f508..140d0664b5 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -91,6 +91,7 @@ pub fn emitMir( .call_extern => try emit.mirCallExtern(inst), + .add_shifted_register => try emit.mirAddSubtractShiftedRegister(inst), .cmp_shifted_register => try emit.mirAddSubtractShiftedRegister(inst), .cset => try emit.mirConditionalSelect(inst), @@ -132,6 +133,8 @@ pub fn emitMir( .movk => try emit.mirMoveWideImmediate(inst), .movz => try emit.mirMoveWideImmediate(inst), + .mul => try emit.mirDataProcessing3Source(inst), + .nop => try emit.mirNop(), .push_regs => try emit.mirPushPopRegs(inst), @@ -201,6 +204,12 @@ fn instructionSize(emit: *Emit, inst: Mir.Inst.Index) usize { return 5 * 4; } }, + .pop_regs, .push_regs => { + const reg_list = emit.mir.instructions.items(.data)[inst].reg_list; + const number_of_regs = @popCount(u32, reg_list); + const number_of_insts = std.math.divCeil(u6, number_of_regs, 2) catch unreachable; + return number_of_insts * 4; + }, .call_extern => return 4, .dbg_line, .dbg_epilogue_begin, @@ -565,15 +574,15 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { fn mirAddSubtractShiftedRegister(emit: *Emit, inst: Mir.Inst.Index) !void { const tag = emit.mir.instructions.items(.tag)[inst]; const rrr_imm6_shift = emit.mir.instructions.items(.data)[inst].rrr_imm6_shift; + const rd = rrr_imm6_shift.rd; + const rn = rrr_imm6_shift.rn; + const rm = rrr_imm6_shift.rm; + const shift = rrr_imm6_shift.shift; + const imm6 = rrr_imm6_shift.imm6; switch (tag) { - .cmp_shifted_register => try emit.writeInstruction(Instruction.subsShiftedRegister( - rrr_imm6_shift.rd, - rrr_imm6_shift.rn, - rrr_imm6_shift.rm, - rrr_imm6_shift.shift, - rrr_imm6_shift.imm6, - )), + .cmp_shifted_register => try emit.writeInstruction(Instruction.subsShiftedRegister(rd, rn, rm, shift, imm6)), + .add_shifted_register => try emit.writeInstruction(Instruction.addShiftedRegister(rd, rn, rm, shift, imm6)), else => unreachable, } } @@ -802,6 +811,16 @@ fn mirMoveWideImmediate(emit: *Emit, inst: Mir.Inst.Index) !void { } } +fn mirDataProcessing3Source(emit: *Emit, inst: Mir.Inst.Index) !void { + const tag = emit.mir.instructions.items(.tag)[inst]; + const rrr = emit.mir.instructions.items(.data)[inst].rrr; + + switch (tag) { + .mul => try emit.writeInstruction(Instruction.mul(rrr.rd, rrr.rn, rrr.rm)), + else => unreachable, + } +} + fn mirNop(emit: *Emit) !void { try emit.writeInstruction(Instruction.nop()); } diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index cd370c66ed..4f653ff072 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -26,6 +26,8 @@ pub const Inst = struct { pub const Tag = enum(u16) { /// Add (immediate) add_immediate, + /// Add (shifted register) + add_shifted_register, /// Branch conditionally b_cond, /// Branch @@ -82,6 +84,8 @@ pub const Inst = struct { movk, /// Move wide with zero movz, + /// Multiply + mul, /// No Operation nop, /// Pseudo-instruction: Pop multiple registers @@ -187,6 +191,14 @@ pub const Inst = struct { imm12: u12, sh: u1 = 0, }, + /// Two registers + /// + /// Used by e.g. mul + rrr: struct { + rd: Register, + rn: Register, + rm: Register, + }, /// Three registers and a shift (shift type and 6-bit amount) /// /// Used by e.g. cmp_shifted_register @@ -208,7 +220,7 @@ pub const Inst = struct { }, /// Two registers and a LoadStoreOffsetImmediate /// - /// Used by e.g. str_register + /// Used by e.g. str_immediate load_store_register_immediate: struct { rt: Register, rn: Register,