stage2 ARM: optimize airSliceElemVal for elem_size 1 or 4

In these cases, the AIR inst can be lowered to only one ldr
instruction.

Also fixes shifts in arm.bits.Offset
This commit is contained in:
joachimschmidt557 2022-02-03 20:31:01 +01:00 committed by Jakub Konka
parent 71321b6941
commit 04f379dd41
2 changed files with 139 additions and 105 deletions

View File

@ -1222,9 +1222,16 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
const is_volatile = false; // TODO
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else result: {
if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
const result: MCValue = result: {
const slice_mcv = try self.resolveInst(bin_op.lhs);
// TODO optimize for the case where the index is a constant,
// i.e. index_mcv == .immediate
const index_mcv = try self.resolveInst(bin_op.rhs);
const index_is_register = index_mcv == .register;
const slice_ty = self.air.typeOf(bin_op.lhs);
const elem_ty = slice_ty.childType();
const elem_size = elem_ty.abiSize(self.target.*);
@ -1232,12 +1239,8 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
var buf: Type.SlicePtrFieldTypeBuffer = undefined;
const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
// TODO optimize this for the case when elem_size is a power
// of two (includes elem_size == 1)
const offset_mcv = try self.genArmMulConstant(inst, bin_op.rhs, 1, @intCast(u32, elem_size));
assert(offset_mcv == .register); // result of multiplication should always be register
self.register_manager.freezeRegs(&.{offset_mcv.register});
defer self.register_manager.unfreezeRegs(&.{offset_mcv.register});
if (index_is_register) self.register_manager.freezeRegs(&.{index_mcv.register});
defer if (index_is_register) self.register_manager.unfreezeRegs(&.{index_mcv.register});
const base_mcv: MCValue = switch (slice_mcv) {
.stack_offset => .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, slice_mcv) },
@ -1246,61 +1249,67 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
self.register_manager.freezeRegs(&.{base_mcv.register});
defer self.register_manager.unfreezeRegs(&.{base_mcv.register});
if (elem_size <= 4) {
const dst_reg = try self.register_manager.allocReg(inst);
self.register_manager.freezeRegs(&.{dst_reg});
defer self.register_manager.unfreezeRegs(&.{dst_reg});
switch (elem_size) {
1, 4 => {
const dst_reg = try self.register_manager.allocReg(inst);
const dst_mcv = MCValue{ .register = dst_reg };
self.register_manager.freezeRegs(&.{dst_reg});
defer self.register_manager.unfreezeRegs(&.{dst_reg});
switch (elem_size) {
1, 4 => {
const tag: Mir.Inst.Tag = switch (elem_size) {
1 => .ldrb,
4 => .ldr,
else => unreachable,
};
const index_reg: Register = switch (index_mcv) {
.register => |reg| reg,
else => try self.copyToTmpRegister(Type.usize, index_mcv),
};
self.register_manager.freezeRegs(&.{index_reg});
defer self.register_manager.unfreezeRegs(&.{index_reg});
_ = try self.addInst(.{
.tag = tag,
.data = .{ .rr_offset = .{
.rt = dst_reg,
.rn = base_mcv.register,
.offset = .{ .offset = Instruction.Offset.reg(offset_mcv.register, 0) },
} },
});
},
2 => {
_ = try self.addInst(.{
.tag = .ldrh,
.data = .{ .rr_extra_offset = .{
.rt = dst_reg,
.rn = base_mcv.register,
.offset = .{ .offset = Instruction.ExtraLoadStoreOffset.reg(offset_mcv.register) },
} },
});
},
else => unreachable,
}
const tag: Mir.Inst.Tag = switch (elem_size) {
1 => .ldrb,
4 => .ldr,
else => unreachable,
};
const shift: u5 = switch (elem_size) {
1 => 0,
4 => 2,
else => unreachable,
};
break :result MCValue{ .register = dst_reg };
} else {
const dst_mcv = try self.allocRegOrMem(inst, false);
_ = try self.addInst(.{
.tag = tag,
.data = .{ .rr_offset = .{
.rt = dst_reg,
.rn = base_mcv.register,
.offset = .{ .offset = Instruction.Offset.reg(index_reg, .{ .lsl = shift }) },
} },
});
const addr_reg = try self.register_manager.allocReg(null);
self.register_manager.freezeRegs(&.{addr_reg});
defer self.register_manager.unfreezeRegs(&.{addr_reg});
break :result dst_mcv;
},
else => {
const dst_mcv = try self.allocRegOrMem(inst, true);
try self.genArmBinOpCode(addr_reg, base_mcv, offset_mcv, false, .add, .unsigned);
const offset_mcv = try self.genArmMulConstant(bin_op.rhs, @intCast(u32, elem_size));
assert(offset_mcv == .register); // result of multiplication should always be register
self.register_manager.freezeRegs(&.{offset_mcv.register});
defer self.register_manager.unfreezeRegs(&.{offset_mcv.register});
// I know we will unfreeze these registers at the end of
// the scope of :result. However, at this point in time,
// neither the base register nor the offset register
// contains any valuable data anymore. In order to reduce
// register pressure, unfreeze them prematurely
self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
const addr_reg = try self.register_manager.allocReg(null);
self.register_manager.freezeRegs(&.{addr_reg});
defer self.register_manager.unfreezeRegs(&.{addr_reg});
try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
try self.genArmBinOpCode(addr_reg, base_mcv, offset_mcv, false, .add, .unsigned);
break :result dst_mcv;
// I know we will unfreeze these registers at the end of
// the scope of :result. However, at this point in time,
// neither the base register nor the offset register
// contains any valuable data anymore. In order to reduce
// register pressure, unfreeze them prematurely
self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
break :result dst_mcv;
},
}
};
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
@ -1931,8 +1940,8 @@ fn genArmBinOpCode(
.shl, .shr => {
assert(!swap_lhs_and_rhs);
const shift_amount = switch (operand) {
.Register => |reg_op| Instruction.ShiftAmount.reg(@intToEnum(Register, reg_op.rm)),
.Immediate => |imm_op| Instruction.ShiftAmount.imm(@intCast(u5, imm_op.imm)),
.register => |reg_op| Instruction.ShiftAmount.reg(@intToEnum(Register, reg_op.rm)),
.immediate => |imm_op| Instruction.ShiftAmount.imm(@intCast(u5, imm_op.imm)),
};
const tag: Mir.Inst.Tag = switch (op) {
@ -2036,12 +2045,11 @@ fn genArmMul(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Ai
return dst_mcv;
}
fn genArmMulConstant(self: *Self, inst: Air.Inst.Index, op: Air.Inst.Ref, op_index: Liveness.OperandInt, imm: u32) !MCValue {
fn genArmMulConstant(self: *Self, op: Air.Inst.Ref, imm: u32) !MCValue {
const lhs = try self.resolveInst(op);
const rhs = MCValue{ .immediate = imm };
const lhs_is_register = lhs == .register;
const reuse_lhs = lhs_is_register and self.reuseOperand(inst, op, op_index, lhs);
if (lhs_is_register) self.register_manager.freezeRegs(&.{lhs.register});
defer if (lhs_is_register) self.register_manager.unfreezeRegs(&.{lhs.register});
@ -2054,23 +2062,17 @@ fn genArmMulConstant(self: *Self, inst: Air.Inst.Index, op: Air.Inst.Ref, op_ind
var rhs_mcv: MCValue = rhs;
// Allocate registers for operands and/or destination
if (reuse_lhs) {
// Allocate 1 register
rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
dst_mcv = lhs;
// Allocate 1 or 2 registers
if (lhs_is_register) {
// Move RHS to register
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
rhs_mcv = dst_mcv;
} else {
// Allocate 1 or 2 registers
if (lhs_is_register) {
// Move RHS to register
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
rhs_mcv = dst_mcv;
} else {
// Move LHS and RHS to register
const regs = try self.register_manager.allocRegs(2, .{ null, null });
lhs_mcv = MCValue{ .register = regs[0] };
rhs_mcv = MCValue{ .register = regs[1] };
dst_mcv = lhs_mcv;
}
// Move LHS and RHS to register
const regs = try self.register_manager.allocRegs(2, .{ null, null });
lhs_mcv = MCValue{ .register = regs[0] };
rhs_mcv = MCValue{ .register = regs[1] };
dst_mcv = lhs_mcv;
}
// Move the operands to the newly allocated registers
@ -2132,7 +2134,7 @@ fn genArmInlineMemcpy(
.data = .{ .rr_offset = .{
.rt = tmp,
.rn = src,
.offset = .{ .offset = Instruction.Offset.reg(count, 0) },
.offset = .{ .offset = Instruction.Offset.reg(count, .none) },
} },
});
@ -2142,7 +2144,7 @@ fn genArmInlineMemcpy(
.data = .{ .rr_offset = .{
.rt = tmp,
.rn = dst,
.offset = .{ .offset = Instruction.Offset.reg(count, 0) },
.offset = .{ .offset = Instruction.Offset.reg(count, .none) },
} },
});
@ -3126,7 +3128,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
1, 4 => {
const offset = if (math.cast(u12, adj_off)) |imm| blk: {
break :blk Instruction.Offset.imm(imm);
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
const tag: Mir.Inst.Tag = switch (abi_size) {
1 => .strb,
@ -3450,7 +3452,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
1, 4 => {
const offset = if (adj_off <= math.maxInt(u12)) blk: {
break :blk Instruction.Offset.imm(@intCast(u12, adj_off));
} else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
} else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
const tag: Mir.Inst.Tag = switch (abi_size) {
1 => .ldrb,
@ -3536,7 +3538,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I
1, 4 => {
const offset = if (math.cast(u12, adj_off)) |imm| blk: {
break :blk Instruction.Offset.imm(imm);
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
const tag: Mir.Inst.Tag = switch (abi_size) {
1 => .strb,

View File

@ -343,11 +343,11 @@ pub const Instruction = union(enum) {
/// which can either be content from a register or an immediate
/// value
pub const Operand = union(enum) {
Register: packed struct {
register: packed struct {
rm: u4,
shift: u8,
},
Immediate: packed struct {
immediate: packed struct {
imm: u8,
rotate: u4,
},
@ -356,12 +356,12 @@ pub const Instruction = union(enum) {
/// register can be shifted by a specific immediate value or
/// by the contents of another register
pub const Shift = union(enum) {
Immediate: packed struct {
immediate: packed struct {
fixed: u1 = 0b0,
typ: u2,
amount: u5,
},
Register: packed struct {
register: packed struct {
fixed_1: u1 = 0b1,
typ: u2,
fixed_2: u1 = 0b0,
@ -376,7 +376,7 @@ pub const Instruction = union(enum) {
};
pub const none = Shift{
.Immediate = .{
.immediate = .{
.amount = 0,
.typ = 0,
},
@ -384,14 +384,14 @@ pub const Instruction = union(enum) {
pub fn toU8(self: Shift) u8 {
return switch (self) {
.Register => |v| @bitCast(u8, v),
.Immediate => |v| @bitCast(u8, v),
.register => |v| @bitCast(u8, v),
.immediate => |v| @bitCast(u8, v),
};
}
pub fn reg(rs: Register, typ: Type) Shift {
return Shift{
.Register = .{
.register = .{
.rs = rs.id(),
.typ = @enumToInt(typ),
},
@ -400,7 +400,7 @@ pub const Instruction = union(enum) {
pub fn imm(amount: u5, typ: Type) Shift {
return Shift{
.Immediate = .{
.immediate = .{
.amount = amount,
.typ = @enumToInt(typ),
},
@ -410,14 +410,14 @@ pub const Instruction = union(enum) {
pub fn toU12(self: Operand) u12 {
return switch (self) {
.Register => |v| @bitCast(u12, v),
.Immediate => |v| @bitCast(u12, v),
.register => |v| @bitCast(u12, v),
.immediate => |v| @bitCast(u12, v),
};
}
pub fn reg(rm: Register, shift: Shift) Operand {
return Operand{
.Register = .{
.register = .{
.rm = rm.id(),
.shift = shift.toU8(),
},
@ -426,7 +426,7 @@ pub const Instruction = union(enum) {
pub fn imm(immediate: u8, rotate: u4) Operand {
return Operand{
.Immediate = .{
.immediate = .{
.imm = immediate,
.rotate = rotate,
},
@ -447,7 +447,7 @@ pub const Instruction = union(enum) {
return for (masks) |mask, i| {
if (x & mask == x) {
break Operand{
.Immediate = .{
.immediate = .{
.imm = @intCast(u8, std.math.rotl(u32, x, 2 * i)),
.rotate = @intCast(u4, i),
},
@ -461,35 +461,67 @@ pub const Instruction = union(enum) {
/// instruction. Data can be loaded from memory with either an
/// immediate offset or an offset that is stored in some register.
pub const Offset = union(enum) {
Immediate: u12,
Register: packed struct {
immediate: u12,
register: packed struct {
rm: u4,
shift: u8,
fixed: u1 = 0b0,
stype: u2,
imm5: u5,
},
pub const Shift = union(enum) {
/// No shift
none,
/// Logical shift left
lsl: u5,
/// Logical shift right
lsr: u5,
/// Arithmetic shift right
asr: u5,
/// Rotate right
ror: u5,
/// Rotate right one bit, with extend
rrx,
};
pub const none = Offset{
.Immediate = 0,
.immediate = 0,
};
pub fn toU12(self: Offset) u12 {
return switch (self) {
.Register => |v| @bitCast(u12, v),
.Immediate => |v| v,
.register => |v| @bitCast(u12, v),
.immediate => |v| v,
};
}
pub fn reg(rm: Register, shift: u8) Offset {
pub fn reg(rm: Register, shift: Shift) Offset {
return Offset{
.Register = .{
.register = .{
.rm = rm.id(),
.shift = shift,
.stype = switch (shift) {
.none => 0b00,
.lsl => 0b00,
.lsr => 0b01,
.asr => 0b10,
.ror => 0b11,
.rrx => 0b11,
},
.imm5 = switch (shift) {
.none => 0,
.lsl => |n| n,
.lsr => |n| n,
.asr => |n| n,
.ror => |n| n,
.rrx => 0,
},
},
};
}
pub fn imm(immediate: u12) Offset {
return Offset{
.Immediate = immediate,
.immediate = immediate,
};
}
};
@ -567,7 +599,7 @@ pub const Instruction = union(enum) {
return Instruction{
.data_processing = .{
.cond = @enumToInt(cond),
.i = @boolToInt(op2 == .Immediate),
.i = @boolToInt(op2 == .immediate),
.opcode = @enumToInt(opcode),
.s = s,
.rn = rn.id(),
@ -681,7 +713,7 @@ pub const Instruction = union(enum) {
.byte_word = byte_word,
.up_down = @boolToInt(positive),
.pre_post = @boolToInt(pre_index),
.imm = @boolToInt(offset != .Immediate),
.imm = @boolToInt(offset != .immediate),
},
};
}