stage2 ARM: better immediate loading feat. movw and movt

This commit is contained in:
joachimschmidt557 2020-10-11 09:57:25 +02:00
parent 59af275680
commit 7391087df1
2 changed files with 83 additions and 27 deletions

View File

@ -2274,35 +2274,39 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa });
},
.immediate => |x| {
// TODO better analysis of x to determine the
// least amount of necessary instructions (use
// more intelligent rotating)
if (x <= math.maxInt(u8)) {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
return;
} else if (x <= math.maxInt(u16)) {
// TODO Use movw Note: Not supported on
// all ARM targets!
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
} else if (x <= math.maxInt(u32)) {
// TODO Use movw and movt Note: Not
// supported on all ARM targets! Also TODO
// write constant to code and load
// relative to pc
if (x > math.maxInt(u32)) return self.fail(src, "ARM registers are 32-bit wide", .{});
// immediate: 0xaabbccdd
// mov reg, #0xaa
// orr reg, reg, #0xbb, 24
// orr reg, reg, #0xcc, 16
// orr reg, reg, #0xdd, 8
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
return;
if (Instruction.Operand.fromU32(@intCast(u32, x))) |op| {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, op).toU32());
} else if (Instruction.Operand.fromU32(~@intCast(u32, x))) |op| {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mvn(.al, reg, op).toU32());
} else if (x <= math.maxInt(u16)) {
if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @intCast(u16, x)).toU32());
} else {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
}
} else {
return self.fail(src, "ARM registers are 32-bit wide", .{});
// TODO write constant to code and load
// relative to pc
if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
// immediate: 0xaaaabbbb
// movw reg, #0xbbbb
// movt reg, #0xaaaa
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @truncate(u16, x)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movt(.al, reg, @truncate(u16, x >> 16)).toU32());
} else {
// immediate: 0xaabbccdd
// mov reg, #0xaa
// orr reg, reg, #0xbb, 24
// orr reg, reg, #0xcc, 16
// orr reg, reg, #0xdd, 8
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
}
}
},
.register => |src_reg| {

View File

@ -317,6 +317,29 @@ pub const Instruction = union(enum) {
},
};
}
/// Tries to convert an unsigned 32 bit integer into an
/// immediate operand using rotation. Returns null when there
/// is no conversion
pub fn fromU32(x: u32) ?Operand {
const masks = comptime blk: {
const base_mask: u32 = std.math.maxInt(u8);
var result = [_]u32{0} ** 16;
for (result) |*mask, i| mask.* = std.math.rotr(u32, base_mask, 2 * i);
break :blk result;
};
return for (masks) |mask, i| {
if (x & mask == x) {
break Operand{
.Immediate = .{
.imm = @intCast(u8, std.math.rotl(u32, x, 2 * i)),
.rotate = @intCast(u4, i),
},
};
}
} else null;
}
};
/// Represents the offset operand of a load or store
@ -412,6 +435,25 @@ pub const Instruction = union(enum) {
};
}
fn specialMov(
cond: Condition,
rd: Register,
imm: u16,
top: bool,
) Instruction {
return Instruction{
.DataProcessing = .{
.cond = @enumToInt(cond),
.i = 1,
.opcode = if (top) 0b1010 else 0b1000,
.s = 0,
.rn = @truncate(u4, imm >> 12),
.rd = rd.id(),
.op2 = @truncate(u12, imm),
},
};
}
fn singleDataTransfer(
cond: Condition,
rd: Register,
@ -618,6 +660,16 @@ pub const Instruction = union(enum) {
return dataProcessing(cond, .mvn, 1, rd, .r0, op2);
}
// movw and movt
pub fn movw(cond: Condition, rd: Register, imm: u16) Instruction {
return specialMov(cond, rd, imm, false);
}
pub fn movt(cond: Condition, rd: Register, imm: u16) Instruction {
return specialMov(cond, rd, imm, true);
}
// PSR transfer
pub fn mrs(cond: Condition, rd: Register, psr: Psr) Instruction {