mirror of
https://github.com/ziglang/zig.git
synced 2026-01-09 08:55:36 +00:00
stage2 ARM: optimize airSliceElemVal for elem_size 1 or 4
In these cases, the AIR inst can be lowered to only one ldr instruction. Also fixes shifts in arm.bits.Offset
This commit is contained in:
parent
71321b6941
commit
04f379dd41
@ -1222,9 +1222,16 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
|
||||
fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const is_volatile = false; // TODO
|
||||
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
|
||||
const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else result: {
|
||||
|
||||
if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
|
||||
const result: MCValue = result: {
|
||||
const slice_mcv = try self.resolveInst(bin_op.lhs);
|
||||
|
||||
// TODO optimize for the case where the index is a constant,
|
||||
// i.e. index_mcv == .immediate
|
||||
const index_mcv = try self.resolveInst(bin_op.rhs);
|
||||
const index_is_register = index_mcv == .register;
|
||||
|
||||
const slice_ty = self.air.typeOf(bin_op.lhs);
|
||||
const elem_ty = slice_ty.childType();
|
||||
const elem_size = elem_ty.abiSize(self.target.*);
|
||||
@ -1232,12 +1239,8 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
|
||||
var buf: Type.SlicePtrFieldTypeBuffer = undefined;
|
||||
const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
|
||||
|
||||
// TODO optimize this for the case when elem_size is a power
|
||||
// of two (includes elem_size == 1)
|
||||
const offset_mcv = try self.genArmMulConstant(inst, bin_op.rhs, 1, @intCast(u32, elem_size));
|
||||
assert(offset_mcv == .register); // result of multiplication should always be register
|
||||
self.register_manager.freezeRegs(&.{offset_mcv.register});
|
||||
defer self.register_manager.unfreezeRegs(&.{offset_mcv.register});
|
||||
if (index_is_register) self.register_manager.freezeRegs(&.{index_mcv.register});
|
||||
defer if (index_is_register) self.register_manager.unfreezeRegs(&.{index_mcv.register});
|
||||
|
||||
const base_mcv: MCValue = switch (slice_mcv) {
|
||||
.stack_offset => .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, slice_mcv) },
|
||||
@ -1246,61 +1249,67 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
|
||||
self.register_manager.freezeRegs(&.{base_mcv.register});
|
||||
defer self.register_manager.unfreezeRegs(&.{base_mcv.register});
|
||||
|
||||
if (elem_size <= 4) {
|
||||
const dst_reg = try self.register_manager.allocReg(inst);
|
||||
self.register_manager.freezeRegs(&.{dst_reg});
|
||||
defer self.register_manager.unfreezeRegs(&.{dst_reg});
|
||||
switch (elem_size) {
|
||||
1, 4 => {
|
||||
const dst_reg = try self.register_manager.allocReg(inst);
|
||||
const dst_mcv = MCValue{ .register = dst_reg };
|
||||
self.register_manager.freezeRegs(&.{dst_reg});
|
||||
defer self.register_manager.unfreezeRegs(&.{dst_reg});
|
||||
|
||||
switch (elem_size) {
|
||||
1, 4 => {
|
||||
const tag: Mir.Inst.Tag = switch (elem_size) {
|
||||
1 => .ldrb,
|
||||
4 => .ldr,
|
||||
else => unreachable,
|
||||
};
|
||||
const index_reg: Register = switch (index_mcv) {
|
||||
.register => |reg| reg,
|
||||
else => try self.copyToTmpRegister(Type.usize, index_mcv),
|
||||
};
|
||||
self.register_manager.freezeRegs(&.{index_reg});
|
||||
defer self.register_manager.unfreezeRegs(&.{index_reg});
|
||||
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.data = .{ .rr_offset = .{
|
||||
.rt = dst_reg,
|
||||
.rn = base_mcv.register,
|
||||
.offset = .{ .offset = Instruction.Offset.reg(offset_mcv.register, 0) },
|
||||
} },
|
||||
});
|
||||
},
|
||||
2 => {
|
||||
_ = try self.addInst(.{
|
||||
.tag = .ldrh,
|
||||
.data = .{ .rr_extra_offset = .{
|
||||
.rt = dst_reg,
|
||||
.rn = base_mcv.register,
|
||||
.offset = .{ .offset = Instruction.ExtraLoadStoreOffset.reg(offset_mcv.register) },
|
||||
} },
|
||||
});
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
const tag: Mir.Inst.Tag = switch (elem_size) {
|
||||
1 => .ldrb,
|
||||
4 => .ldr,
|
||||
else => unreachable,
|
||||
};
|
||||
const shift: u5 = switch (elem_size) {
|
||||
1 => 0,
|
||||
4 => 2,
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
break :result MCValue{ .register = dst_reg };
|
||||
} else {
|
||||
const dst_mcv = try self.allocRegOrMem(inst, false);
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.data = .{ .rr_offset = .{
|
||||
.rt = dst_reg,
|
||||
.rn = base_mcv.register,
|
||||
.offset = .{ .offset = Instruction.Offset.reg(index_reg, .{ .lsl = shift }) },
|
||||
} },
|
||||
});
|
||||
|
||||
const addr_reg = try self.register_manager.allocReg(null);
|
||||
self.register_manager.freezeRegs(&.{addr_reg});
|
||||
defer self.register_manager.unfreezeRegs(&.{addr_reg});
|
||||
break :result dst_mcv;
|
||||
},
|
||||
else => {
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
|
||||
try self.genArmBinOpCode(addr_reg, base_mcv, offset_mcv, false, .add, .unsigned);
|
||||
const offset_mcv = try self.genArmMulConstant(bin_op.rhs, @intCast(u32, elem_size));
|
||||
assert(offset_mcv == .register); // result of multiplication should always be register
|
||||
self.register_manager.freezeRegs(&.{offset_mcv.register});
|
||||
defer self.register_manager.unfreezeRegs(&.{offset_mcv.register});
|
||||
|
||||
// I know we will unfreeze these registers at the end of
|
||||
// the scope of :result. However, at this point in time,
|
||||
// neither the base register nor the offset register
|
||||
// contains any valuable data anymore. In order to reduce
|
||||
// register pressure, unfreeze them prematurely
|
||||
self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
|
||||
const addr_reg = try self.register_manager.allocReg(null);
|
||||
self.register_manager.freezeRegs(&.{addr_reg});
|
||||
defer self.register_manager.unfreezeRegs(&.{addr_reg});
|
||||
|
||||
try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
|
||||
try self.genArmBinOpCode(addr_reg, base_mcv, offset_mcv, false, .add, .unsigned);
|
||||
|
||||
break :result dst_mcv;
|
||||
// I know we will unfreeze these registers at the end of
|
||||
// the scope of :result. However, at this point in time,
|
||||
// neither the base register nor the offset register
|
||||
// contains any valuable data anymore. In order to reduce
|
||||
// register pressure, unfreeze them prematurely
|
||||
self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
|
||||
|
||||
try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
|
||||
|
||||
break :result dst_mcv;
|
||||
},
|
||||
}
|
||||
};
|
||||
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
|
||||
@ -1931,8 +1940,8 @@ fn genArmBinOpCode(
|
||||
.shl, .shr => {
|
||||
assert(!swap_lhs_and_rhs);
|
||||
const shift_amount = switch (operand) {
|
||||
.Register => |reg_op| Instruction.ShiftAmount.reg(@intToEnum(Register, reg_op.rm)),
|
||||
.Immediate => |imm_op| Instruction.ShiftAmount.imm(@intCast(u5, imm_op.imm)),
|
||||
.register => |reg_op| Instruction.ShiftAmount.reg(@intToEnum(Register, reg_op.rm)),
|
||||
.immediate => |imm_op| Instruction.ShiftAmount.imm(@intCast(u5, imm_op.imm)),
|
||||
};
|
||||
|
||||
const tag: Mir.Inst.Tag = switch (op) {
|
||||
@ -2036,12 +2045,11 @@ fn genArmMul(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Ai
|
||||
return dst_mcv;
|
||||
}
|
||||
|
||||
fn genArmMulConstant(self: *Self, inst: Air.Inst.Index, op: Air.Inst.Ref, op_index: Liveness.OperandInt, imm: u32) !MCValue {
|
||||
fn genArmMulConstant(self: *Self, op: Air.Inst.Ref, imm: u32) !MCValue {
|
||||
const lhs = try self.resolveInst(op);
|
||||
const rhs = MCValue{ .immediate = imm };
|
||||
|
||||
const lhs_is_register = lhs == .register;
|
||||
const reuse_lhs = lhs_is_register and self.reuseOperand(inst, op, op_index, lhs);
|
||||
|
||||
if (lhs_is_register) self.register_manager.freezeRegs(&.{lhs.register});
|
||||
defer if (lhs_is_register) self.register_manager.unfreezeRegs(&.{lhs.register});
|
||||
@ -2054,23 +2062,17 @@ fn genArmMulConstant(self: *Self, inst: Air.Inst.Index, op: Air.Inst.Ref, op_ind
|
||||
var rhs_mcv: MCValue = rhs;
|
||||
|
||||
// Allocate registers for operands and/or destination
|
||||
if (reuse_lhs) {
|
||||
// Allocate 1 register
|
||||
rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
|
||||
dst_mcv = lhs;
|
||||
// Allocate 1 or 2 registers
|
||||
if (lhs_is_register) {
|
||||
// Move RHS to register
|
||||
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
|
||||
rhs_mcv = dst_mcv;
|
||||
} else {
|
||||
// Allocate 1 or 2 registers
|
||||
if (lhs_is_register) {
|
||||
// Move RHS to register
|
||||
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
|
||||
rhs_mcv = dst_mcv;
|
||||
} else {
|
||||
// Move LHS and RHS to register
|
||||
const regs = try self.register_manager.allocRegs(2, .{ null, null });
|
||||
lhs_mcv = MCValue{ .register = regs[0] };
|
||||
rhs_mcv = MCValue{ .register = regs[1] };
|
||||
dst_mcv = lhs_mcv;
|
||||
}
|
||||
// Move LHS and RHS to register
|
||||
const regs = try self.register_manager.allocRegs(2, .{ null, null });
|
||||
lhs_mcv = MCValue{ .register = regs[0] };
|
||||
rhs_mcv = MCValue{ .register = regs[1] };
|
||||
dst_mcv = lhs_mcv;
|
||||
}
|
||||
|
||||
// Move the operands to the newly allocated registers
|
||||
@ -2132,7 +2134,7 @@ fn genArmInlineMemcpy(
|
||||
.data = .{ .rr_offset = .{
|
||||
.rt = tmp,
|
||||
.rn = src,
|
||||
.offset = .{ .offset = Instruction.Offset.reg(count, 0) },
|
||||
.offset = .{ .offset = Instruction.Offset.reg(count, .none) },
|
||||
} },
|
||||
});
|
||||
|
||||
@ -2142,7 +2144,7 @@ fn genArmInlineMemcpy(
|
||||
.data = .{ .rr_offset = .{
|
||||
.rt = tmp,
|
||||
.rn = dst,
|
||||
.offset = .{ .offset = Instruction.Offset.reg(count, 0) },
|
||||
.offset = .{ .offset = Instruction.Offset.reg(count, .none) },
|
||||
} },
|
||||
});
|
||||
|
||||
@ -3126,7 +3128,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
|
||||
1, 4 => {
|
||||
const offset = if (math.cast(u12, adj_off)) |imm| blk: {
|
||||
break :blk Instruction.Offset.imm(imm);
|
||||
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
|
||||
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
|
||||
|
||||
const tag: Mir.Inst.Tag = switch (abi_size) {
|
||||
1 => .strb,
|
||||
@ -3450,7 +3452,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
|
||||
1, 4 => {
|
||||
const offset = if (adj_off <= math.maxInt(u12)) blk: {
|
||||
break :blk Instruction.Offset.imm(@intCast(u12, adj_off));
|
||||
} else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
|
||||
} else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
|
||||
|
||||
const tag: Mir.Inst.Tag = switch (abi_size) {
|
||||
1 => .ldrb,
|
||||
@ -3536,7 +3538,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I
|
||||
1, 4 => {
|
||||
const offset = if (math.cast(u12, adj_off)) |imm| blk: {
|
||||
break :blk Instruction.Offset.imm(imm);
|
||||
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
|
||||
} else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
|
||||
|
||||
const tag: Mir.Inst.Tag = switch (abi_size) {
|
||||
1 => .strb,
|
||||
|
||||
@ -343,11 +343,11 @@ pub const Instruction = union(enum) {
|
||||
/// which can either be content from a register or an immediate
|
||||
/// value
|
||||
pub const Operand = union(enum) {
|
||||
Register: packed struct {
|
||||
register: packed struct {
|
||||
rm: u4,
|
||||
shift: u8,
|
||||
},
|
||||
Immediate: packed struct {
|
||||
immediate: packed struct {
|
||||
imm: u8,
|
||||
rotate: u4,
|
||||
},
|
||||
@ -356,12 +356,12 @@ pub const Instruction = union(enum) {
|
||||
/// register can be shifted by a specific immediate value or
|
||||
/// by the contents of another register
|
||||
pub const Shift = union(enum) {
|
||||
Immediate: packed struct {
|
||||
immediate: packed struct {
|
||||
fixed: u1 = 0b0,
|
||||
typ: u2,
|
||||
amount: u5,
|
||||
},
|
||||
Register: packed struct {
|
||||
register: packed struct {
|
||||
fixed_1: u1 = 0b1,
|
||||
typ: u2,
|
||||
fixed_2: u1 = 0b0,
|
||||
@ -376,7 +376,7 @@ pub const Instruction = union(enum) {
|
||||
};
|
||||
|
||||
pub const none = Shift{
|
||||
.Immediate = .{
|
||||
.immediate = .{
|
||||
.amount = 0,
|
||||
.typ = 0,
|
||||
},
|
||||
@ -384,14 +384,14 @@ pub const Instruction = union(enum) {
|
||||
|
||||
pub fn toU8(self: Shift) u8 {
|
||||
return switch (self) {
|
||||
.Register => |v| @bitCast(u8, v),
|
||||
.Immediate => |v| @bitCast(u8, v),
|
||||
.register => |v| @bitCast(u8, v),
|
||||
.immediate => |v| @bitCast(u8, v),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn reg(rs: Register, typ: Type) Shift {
|
||||
return Shift{
|
||||
.Register = .{
|
||||
.register = .{
|
||||
.rs = rs.id(),
|
||||
.typ = @enumToInt(typ),
|
||||
},
|
||||
@ -400,7 +400,7 @@ pub const Instruction = union(enum) {
|
||||
|
||||
pub fn imm(amount: u5, typ: Type) Shift {
|
||||
return Shift{
|
||||
.Immediate = .{
|
||||
.immediate = .{
|
||||
.amount = amount,
|
||||
.typ = @enumToInt(typ),
|
||||
},
|
||||
@ -410,14 +410,14 @@ pub const Instruction = union(enum) {
|
||||
|
||||
pub fn toU12(self: Operand) u12 {
|
||||
return switch (self) {
|
||||
.Register => |v| @bitCast(u12, v),
|
||||
.Immediate => |v| @bitCast(u12, v),
|
||||
.register => |v| @bitCast(u12, v),
|
||||
.immediate => |v| @bitCast(u12, v),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn reg(rm: Register, shift: Shift) Operand {
|
||||
return Operand{
|
||||
.Register = .{
|
||||
.register = .{
|
||||
.rm = rm.id(),
|
||||
.shift = shift.toU8(),
|
||||
},
|
||||
@ -426,7 +426,7 @@ pub const Instruction = union(enum) {
|
||||
|
||||
pub fn imm(immediate: u8, rotate: u4) Operand {
|
||||
return Operand{
|
||||
.Immediate = .{
|
||||
.immediate = .{
|
||||
.imm = immediate,
|
||||
.rotate = rotate,
|
||||
},
|
||||
@ -447,7 +447,7 @@ pub const Instruction = union(enum) {
|
||||
return for (masks) |mask, i| {
|
||||
if (x & mask == x) {
|
||||
break Operand{
|
||||
.Immediate = .{
|
||||
.immediate = .{
|
||||
.imm = @intCast(u8, std.math.rotl(u32, x, 2 * i)),
|
||||
.rotate = @intCast(u4, i),
|
||||
},
|
||||
@ -461,35 +461,67 @@ pub const Instruction = union(enum) {
|
||||
/// instruction. Data can be loaded from memory with either an
|
||||
/// immediate offset or an offset that is stored in some register.
|
||||
pub const Offset = union(enum) {
|
||||
Immediate: u12,
|
||||
Register: packed struct {
|
||||
immediate: u12,
|
||||
register: packed struct {
|
||||
rm: u4,
|
||||
shift: u8,
|
||||
fixed: u1 = 0b0,
|
||||
stype: u2,
|
||||
imm5: u5,
|
||||
},
|
||||
|
||||
pub const Shift = union(enum) {
|
||||
/// No shift
|
||||
none,
|
||||
/// Logical shift left
|
||||
lsl: u5,
|
||||
/// Logical shift right
|
||||
lsr: u5,
|
||||
/// Arithmetic shift right
|
||||
asr: u5,
|
||||
/// Rotate right
|
||||
ror: u5,
|
||||
/// Rotate right one bit, with extend
|
||||
rrx,
|
||||
};
|
||||
|
||||
pub const none = Offset{
|
||||
.Immediate = 0,
|
||||
.immediate = 0,
|
||||
};
|
||||
|
||||
pub fn toU12(self: Offset) u12 {
|
||||
return switch (self) {
|
||||
.Register => |v| @bitCast(u12, v),
|
||||
.Immediate => |v| v,
|
||||
.register => |v| @bitCast(u12, v),
|
||||
.immediate => |v| v,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn reg(rm: Register, shift: u8) Offset {
|
||||
pub fn reg(rm: Register, shift: Shift) Offset {
|
||||
return Offset{
|
||||
.Register = .{
|
||||
.register = .{
|
||||
.rm = rm.id(),
|
||||
.shift = shift,
|
||||
.stype = switch (shift) {
|
||||
.none => 0b00,
|
||||
.lsl => 0b00,
|
||||
.lsr => 0b01,
|
||||
.asr => 0b10,
|
||||
.ror => 0b11,
|
||||
.rrx => 0b11,
|
||||
},
|
||||
.imm5 = switch (shift) {
|
||||
.none => 0,
|
||||
.lsl => |n| n,
|
||||
.lsr => |n| n,
|
||||
.asr => |n| n,
|
||||
.ror => |n| n,
|
||||
.rrx => 0,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn imm(immediate: u12) Offset {
|
||||
return Offset{
|
||||
.Immediate = immediate,
|
||||
.immediate = immediate,
|
||||
};
|
||||
}
|
||||
};
|
||||
@ -567,7 +599,7 @@ pub const Instruction = union(enum) {
|
||||
return Instruction{
|
||||
.data_processing = .{
|
||||
.cond = @enumToInt(cond),
|
||||
.i = @boolToInt(op2 == .Immediate),
|
||||
.i = @boolToInt(op2 == .immediate),
|
||||
.opcode = @enumToInt(opcode),
|
||||
.s = s,
|
||||
.rn = rn.id(),
|
||||
@ -681,7 +713,7 @@ pub const Instruction = union(enum) {
|
||||
.byte_word = byte_word,
|
||||
.up_down = @boolToInt(positive),
|
||||
.pre_post = @boolToInt(pre_index),
|
||||
.imm = @boolToInt(offset != .Immediate),
|
||||
.imm = @boolToInt(offset != .immediate),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user