Merge pull request #7367 from kubkon/aarch64-stp-ldp

stage2: add aarch64 load/store pair of registers instructions
This commit is contained in:
Andrew Kelley 2020-12-09 14:31:08 -05:00 committed by GitHub
commit ae3fd86dcc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 272 additions and 153 deletions

View File

@ -2730,16 +2730,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// For MachO, the binary, with the exception of object files, has to be a PIE.
// Therefore we cannot load an absolute address.
// Instead, we need to make use of PC-relative addressing.
if (reg.id() == 0) { // x0 is special-cased
// TODO This needs to be optimised in the stack usage (perhaps use a shadow stack
// like described here:
// https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/using-the-stack-in-aarch64-implementing-push-and-pop)
// TODO As far as branching is concerned, instead of saving the return address
// in a register, I'm thinking here of immitating x86_64, and having the address
// passed on the stack.
if (reg.id() == 0) { // x0 is special-cased
// str x28, [sp, #-16]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.x28, Register.sp, .{
.offset = Instruction.Offset.imm_pre_index(-16),
.offset = Instruction.LoadStoreOffset.imm_pre_index(-16),
}).toU32());
// adr x28, #8
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.adr(.x28, 8).toU32());
@ -2755,21 +2752,24 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// b [label]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(0).toU32());
// mov r, x0
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(reg, .x0, Instruction.RegisterShift.none()).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(
reg,
.x0,
Instruction.RegisterShift.none(),
).toU32());
// ldr x28, [sp], #16
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.x28, .{
.rn = Register.sp,
.offset = Instruction.Offset.imm_post_index(16),
.offset = Instruction.LoadStoreOffset.imm_post_index(16),
}).toU32());
} else {
// str x28, [sp, #-16]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.x28, Register.sp, .{
.offset = Instruction.Offset.imm_pre_index(-16),
}).toU32());
// str x0, [sp, #-16]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.x0, Register.sp, .{
.offset = Instruction.Offset.imm_pre_index(-16),
}).toU32());
// stp x0, x28, [sp, #-16]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.stp(
.x0,
.x28,
Register.sp,
Instruction.LoadStorePairOffset.pre_index(-16),
).toU32());
// adr x28, #8
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.adr(.x28, 8).toU32());
if (self.bin_file.cast(link.File.MachO)) |macho_file| {
@ -2784,17 +2784,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// b [label]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(0).toU32());
// mov r, x0
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(reg, .x0, Instruction.RegisterShift.none()).toU32());
// ldr x0, [sp], #16
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.x0, .{
.rn = Register.sp,
.offset = Instruction.Offset.imm_post_index(16),
}).toU32());
// ldr x28, [sp], #16
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.x28, .{
.rn = Register.sp,
.offset = Instruction.Offset.imm_post_index(16),
}).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(
reg,
.x0,
Instruction.RegisterShift.none(),
).toU32());
// ldp x0, x28, [sp, #16]
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldp(
.x0,
.x28,
Register.sp,
Instruction.LoadStorePairOffset.post_index(16),
).toU32());
}
} else {
// The value is in memory at a hard-coded address.

View File

@ -19,7 +19,7 @@ pub const Register = enum(u6) {
w16, w17, w18, w19, w20, w21, w22, w23,
w24, w25, w26, w27, w28, w29, w30, wzr,
pub const sp = .xzr;
pub const sp = Register.xzr;
pub fn id(self: Register) u5 {
return @truncate(u5, @enumToInt(self));
@ -72,6 +72,9 @@ test "Register.id" {
testing.expectEqual(@as(u5, 31), Register.xzr.id());
testing.expectEqual(@as(u5, 31), Register.wzr.id());
testing.expectEqual(@as(u5, 31), Register.sp.id());
testing.expectEqual(@as(u5, 31), Register.sp.id());
}
test "Register.size" {
@ -232,6 +235,16 @@ pub const Instruction = union(enum) {
fixed: u4 = 0b111_0,
size: u2,
},
LoadStorePairOfRegisters: packed struct {
rt1: u5,
rn: u5,
rt2: u5,
imm7: u7,
load: u1,
encoding: u2,
fixed: u5 = 0b101_0_0,
opc: u2,
},
LoadLiteral: packed struct {
rt: u5,
imm19: u19,
@ -268,6 +281,7 @@ pub const Instruction = union(enum) {
.MoveWideImmediate => |v| @bitCast(u32, v),
.PCRelativeAddress => |v| @bitCast(u32, v),
.LoadStoreRegister => |v| @bitCast(u32, v),
.LoadStorePairOfRegisters => |v| @bitCast(u32, v),
.LoadLiteral => |v| @bitCast(u32, v),
.ExceptionGeneration => |v| @bitCast(u32, v),
.UnconditionalBranchRegister => |v| @bitCast(u32, v),
@ -276,123 +290,6 @@ pub const Instruction = union(enum) {
};
}
/// Represents the offset operand of a load or store instruction.
/// Data can be loaded from memory with either an immediate offset
/// or an offset that is stored in some register.
pub const Offset = union(enum) {
Immediate: union(enum) {
PostIndex: i9,
PreIndex: i9,
Unsigned: u12,
},
Register: struct {
rm: u5,
shift: union(enum) {
Uxtw: u2,
Lsl: u2,
Sxtw: u2,
Sxtx: u2,
},
},
pub const none = Offset{
.Immediate = .{ .Unsigned = 0 },
};
pub fn toU12(self: Offset) u12 {
return switch (self) {
.Immediate => |imm_type| switch (imm_type) {
.PostIndex => |v| (@intCast(u12, @bitCast(u9, v)) << 2) + 1,
.PreIndex => |v| (@intCast(u12, @bitCast(u9, v)) << 2) + 3,
.Unsigned => |v| v,
},
.Register => |r| switch (r.shift) {
.Uxtw => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 16 + 2050,
.Lsl => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 24 + 2050,
.Sxtw => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 48 + 2050,
.Sxtx => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 56 + 2050,
},
};
}
pub fn imm(offset: u12) Offset {
return Offset{
.Immediate = .{ .Unsigned = offset },
};
}
pub fn imm_post_index(offset: i9) Offset {
return Offset{
.Immediate = .{ .PostIndex = offset },
};
}
pub fn imm_pre_index(offset: i9) Offset {
return Offset{
.Immediate = .{ .PreIndex = offset },
};
}
pub fn reg(rm: Register) Offset {
return Offset{
.Register = .{
.rm = rm.id(),
.shift = .{
.Lsl = 0,
},
},
};
}
pub fn reg_uxtw(rm: Register, shift: u2) Offset {
assert(rm.size() == 32 and (shift == 0 or shift == 2));
return Offset{
.Register = .{
.rm = rm.id(),
.shift = .{
.Uxtw = shift,
},
},
};
}
pub fn reg_lsl(rm: Register, shift: u2) Offset {
assert(rm.size() == 64 and (shift == 0 or shift == 3));
return Offset{
.Register = .{
.rm = rm.id(),
.shift = .{
.Lsl = shift,
},
},
};
}
pub fn reg_sxtw(rm: Register, shift: u2) Offset {
assert(rm.size() == 32 and (shift == 0 or shift == 2));
return Offset{
.Register = .{
.rm = rm.id(),
.shift = .{
.Sxtw = shift,
},
},
};
}
pub fn reg_sxtx(rm: Register, shift: u2) Offset {
assert(rm.size() == 64 and (shift == 0 or shift == 3));
return Offset{
.Register = .{
.rm = rm.id(),
.shift = .{
.Sxtx = shift,
},
},
};
}
};
pub const RegisterShift = struct {
rn: u5,
imm6: u6,
@ -500,7 +397,124 @@ pub const Instruction = union(enum) {
};
}
fn loadStoreRegister(rt: Register, rn: Register, offset: Offset, load: bool) Instruction {
/// Represents the offset operand of a load or store instruction.
/// Data can be loaded from memory with either an immediate offset
/// or an offset that is stored in some register.
pub const LoadStoreOffset = union(enum) {
Immediate: union(enum) {
PostIndex: i9,
PreIndex: i9,
Unsigned: u12,
},
Register: struct {
rm: u5,
shift: union(enum) {
Uxtw: u2,
Lsl: u2,
Sxtw: u2,
Sxtx: u2,
},
},
pub const none = LoadStoreOffset{
.Immediate = .{ .Unsigned = 0 },
};
pub fn toU12(self: LoadStoreOffset) u12 {
return switch (self) {
.Immediate => |imm_type| switch (imm_type) {
.PostIndex => |v| (@intCast(u12, @bitCast(u9, v)) << 2) + 1,
.PreIndex => |v| (@intCast(u12, @bitCast(u9, v)) << 2) + 3,
.Unsigned => |v| v,
},
.Register => |r| switch (r.shift) {
.Uxtw => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 16 + 2050,
.Lsl => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 24 + 2050,
.Sxtw => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 48 + 2050,
.Sxtx => |v| (@intCast(u12, r.rm) << 6) + (@intCast(u12, v) << 2) + 56 + 2050,
},
};
}
pub fn imm(offset: u12) LoadStoreOffset {
return .{
.Immediate = .{ .Unsigned = offset },
};
}
pub fn imm_post_index(offset: i9) LoadStoreOffset {
return .{
.Immediate = .{ .PostIndex = offset },
};
}
pub fn imm_pre_index(offset: i9) LoadStoreOffset {
return .{
.Immediate = .{ .PreIndex = offset },
};
}
pub fn reg(rm: Register) LoadStoreOffset {
return .{
.Register = .{
.rm = rm.id(),
.shift = .{
.Lsl = 0,
},
},
};
}
pub fn reg_uxtw(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 32 and (shift == 0 or shift == 2));
return .{
.Register = .{
.rm = rm.id(),
.shift = .{
.Uxtw = shift,
},
},
};
}
pub fn reg_lsl(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 64 and (shift == 0 or shift == 3));
return .{
.Register = .{
.rm = rm.id(),
.shift = .{
.Lsl = shift,
},
},
};
}
pub fn reg_sxtw(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 32 and (shift == 0 or shift == 2));
return .{
.Register = .{
.rm = rm.id(),
.shift = .{
.Sxtw = shift,
},
},
};
}
pub fn reg_sxtx(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 64 and (shift == 0 or shift == 3));
return .{
.Register = .{
.rm = rm.id(),
.shift = .{
.Sxtx = shift,
},
},
};
}
};
fn loadStoreRegister(rt: Register, rn: Register, offset: LoadStoreOffset, load: bool) Instruction {
const off = offset.toU12();
const op1: u2 = blk: {
switch (offset) {
@ -542,6 +556,49 @@ pub const Instruction = union(enum) {
}
}
fn loadStorePairOfRegisters(
rt1: Register,
rt2: Register,
rn: Register,
offset: i9,
encoding: u2,
load: bool,
) Instruction {
switch (rt1.size()) {
32 => {
assert(-256 <= offset and offset <= 252);
const imm7 = @truncate(u7, @bitCast(u9, offset >> 2));
return Instruction{
.LoadStorePairOfRegisters = .{
.rt1 = rt1.id(),
.rn = rn.id(),
.rt2 = rt2.id(),
.imm7 = imm7,
.load = @boolToInt(load),
.encoding = encoding,
.opc = 0b00,
},
};
},
64 => {
assert(-512 <= offset and offset <= 504);
const imm7 = @truncate(u7, @bitCast(u9, offset >> 3));
return Instruction{
.LoadStorePairOfRegisters = .{
.rt1 = rt1.id(),
.rn = rn.id(),
.rt2 = rt2.id(),
.imm7 = imm7,
.load = @boolToInt(load),
.encoding = encoding,
.opc = 0b10,
},
};
},
else => unreachable, // unexpected register size
}
}
fn loadLiteral(rt: Register, imm19: u19) Instruction {
switch (rt.size()) {
32 => {
@ -652,9 +709,10 @@ pub const Instruction = union(enum) {
pub const LdrArgs = struct {
rn: ?Register = null,
offset: Offset = Offset.none,
offset: LoadStoreOffset = LoadStoreOffset.none,
literal: ?u19 = null,
};
pub fn ldr(rt: Register, args: LdrArgs) Instruction {
if (args.rn) |rn| {
return loadStoreRegister(rt, rn, args.offset, true);
@ -664,12 +722,56 @@ pub const Instruction = union(enum) {
}
pub const StrArgs = struct {
offset: Offset = Offset.none,
offset: LoadStoreOffset = LoadStoreOffset.none,
};
pub fn str(rt: Register, rn: Register, args: StrArgs) Instruction {
return loadStoreRegister(rt, rn, args.offset, false);
}
// Load or store pair of registers
pub const LoadStorePairOffset = struct {
encoding: enum(u2) {
PostIndex = 0b01,
Signed = 0b10,
PreIndex = 0b11,
},
offset: i9,
pub fn none() LoadStorePairOffset {
return .{ .encoding = .Signed, .offset = 0 };
}
pub fn post_index(imm: i9) LoadStorePairOffset {
return .{ .encoding = .PostIndex, .offset = imm };
}
pub fn pre_index(imm: i9) LoadStorePairOffset {
return .{ .encoding = .PreIndex, .offset = imm };
}
pub fn signed(imm: i9) LoadStorePairOffset {
return .{ .encoding = .Signed, .offset = imm };
}
};
pub fn ldp(rt1: Register, rt2: Register, rn: Register, offset: LoadStorePairOffset) Instruction {
return loadStorePairOfRegisters(rt1, rt2, rn, offset.offset, @enumToInt(offset.encoding), true);
}
pub fn ldnp(rt1: Register, rt2: Register, rn: Register, offset: i9) Instruction {
return loadStorePairOfRegisters(rt1, rt2, rn, offset, 0, true);
}
pub fn stp(rt1: Register, rt2: Register, rn: Register, offset: LoadStorePairOffset) Instruction {
return loadStorePairOfRegisters(rt1, rt2, rn, offset.offset, @enumToInt(offset.encoding), false);
}
pub fn stnp(rt1: Register, rt2: Register, rn: Register, offset: i9) Instruction {
return loadStorePairOfRegisters(rt1, rt2, rn, offset, 0, false);
}
// Exception generation
pub fn svc(imm16: u16) Instruction {
@ -787,15 +889,15 @@ test "serialize instructions" {
.expected = 0b11_111_0_01_01_000000000000_00001_00010,
},
.{ // ldr x2, [x1, #1]!
.inst = Instruction.ldr(.x2, .{ .rn = .x1, .offset = Instruction.Offset.imm_pre_index(1) }),
.inst = Instruction.ldr(.x2, .{ .rn = .x1, .offset = Instruction.LoadStoreOffset.imm_pre_index(1) }),
.expected = 0b11_111_0_00_01_0_000000001_11_00001_00010,
},
.{ // ldr x2, [x1], #-1
.inst = Instruction.ldr(.x2, .{ .rn = .x1, .offset = Instruction.Offset.imm_post_index(-1) }),
.inst = Instruction.ldr(.x2, .{ .rn = .x1, .offset = Instruction.LoadStoreOffset.imm_post_index(-1) }),
.expected = 0b11_111_0_00_01_0_111111111_01_00001_00010,
},
.{ // ldr x2, [x1], (x3)
.inst = Instruction.ldr(.x2, .{ .rn = .x1, .offset = Instruction.Offset.reg(.x3) }),
.inst = Instruction.ldr(.x2, .{ .rn = .x1, .offset = Instruction.LoadStoreOffset.reg(.x3) }),
.expected = 0b11_111_0_00_01_1_00011_011_0_10_00001_00010,
},
.{ // ldr x2, label
@ -807,7 +909,7 @@ test "serialize instructions" {
.expected = 0b11_111_0_01_00_000000000000_00001_00010,
},
.{ // str x2, [x1], (x3)
.inst = Instruction.str(.x2, .x1, .{ .offset = Instruction.Offset.reg(.x3) }),
.inst = Instruction.str(.x2, .x1, .{ .offset = Instruction.LoadStoreOffset.reg(.x3) }),
.expected = 0b11_111_0_00_00_1_00011_011_0_10_00001_00010,
},
.{ // adr x2, #0x8
@ -826,6 +928,22 @@ test "serialize instructions" {
.inst = Instruction.adrp(.x2, -0x8),
.expected = 0b1_00_10000_1111111111111111110_00010,
},
.{ // stp x1, x2, [sp, #8]
.inst = Instruction.stp(.x1, .x2, Register.sp, Instruction.LoadStorePairOffset.signed(8)),
.expected = 0b10_101_0_010_0_0000001_00010_11111_00001,
},
.{ // ldp x1, x2, [sp, #8]
.inst = Instruction.ldp(.x1, .x2, Register.sp, Instruction.LoadStorePairOffset.signed(8)),
.expected = 0b10_101_0_010_1_0000001_00010_11111_00001,
},
.{ // stp x1, x2, [sp, #-16]!
.inst = Instruction.stp(.x1, .x2, Register.sp, Instruction.LoadStorePairOffset.pre_index(-16)),
.expected = 0b10_101_0_011_0_1111110_00010_11111_00001,
},
.{ // ldp x1, x2, [sp], #16
.inst = Instruction.ldp(.x1, .x2, Register.sp, Instruction.LoadStorePairOffset.post_index(16)),
.expected = 0b10_101_0_001_1_0000010_00010_11111_00001,
},
};
for (testcases) |case| {