x86_64: allow for any index register in complex SIB encodings

This relieves register pressure, and reduce generated code size
(since now we can use the same index register for both `mov_scale_src`
and `mov_scale_dst` MIR instructions).

Fix lowering of ModRM + SIB encodings where index register is extended
- previously, we would carelessly ignore the fact generating incorrect
encodings.
This commit is contained in:
Jakub Konka 2022-09-03 20:36:01 +02:00
parent 619d82234e
commit e0167ae0e3
3 changed files with 161 additions and 113 deletions

View File

@ -5932,7 +5932,6 @@ const InlineMemcpyOpts = struct {
dest_stack_base: ?Register = null,
};
/// Spills .rax and .rcx.
fn genInlineMemcpy(
self: *Self,
dst_ptr: MCValue,
@ -5940,19 +5939,6 @@ fn genInlineMemcpy(
len: MCValue,
opts: InlineMemcpyOpts,
) InnerError!void {
// TODO: Preserve contents of .rax and .rcx if not free and locked, and then restore
// How can we do this without context if the value inside .rax or .rcx we preserve contains
// value needed to perform the memcpy in the first place?
// I think we should have an accumulator-based context that we pass with each subsequent helper
// call until we resolve the entire instruction.
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rcx });
defer for (reg_locks) |lock| {
self.register_manager.unlockReg(lock);
};
const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
self.register_manager.lockReg(reg)
else
@ -5965,7 +5951,13 @@ fn genInlineMemcpy(
null;
defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
const dst_addr_reg = try self.register_manager.allocReg(null, gp);
const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp);
const dst_addr_reg = regs[0];
const src_addr_reg = regs[1];
const index_reg = regs[2].to64();
const count_reg = regs[3].to64();
const tmp_reg = regs[4].to8();
switch (dst_ptr) {
.memory,
.got_load,
@ -5998,10 +5990,7 @@ fn genInlineMemcpy(
return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
},
}
const dst_addr_reg_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
defer self.register_manager.unlockReg(dst_addr_reg_lock);
const src_addr_reg = try self.register_manager.allocReg(null, gp);
switch (src_ptr) {
.memory,
.got_load,
@ -6034,26 +6023,13 @@ fn genInlineMemcpy(
return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr});
},
}
const src_addr_reg_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
defer self.register_manager.unlockReg(src_addr_reg_lock);
const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
const count_reg = regs[0].to64();
const tmp_reg = regs[1].to8();
try self.genSetReg(Type.usize, count_reg, len);
// mov rcx, 0
// mov index_reg, 0
_ = try self.addInst(.{
.tag = .mov,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
.data = .{ .imm = 0 },
});
// mov rax, 0
_ = try self.addInst(.{
.tag = .mov,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
.ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
.data = .{ .imm = 0 },
});
@ -6075,37 +6051,30 @@ fn genInlineMemcpy(
} },
});
// mov tmp, [addr + rcx]
// mov tmp, [addr + index_reg]
_ = try self.addInst(.{
.tag = .mov_scale_src,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = tmp_reg.to8(),
.reg2 = src_addr_reg,
}),
.data = .{ .imm = 0 },
.data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
});
// mov [stack_offset + rax], tmp
// mov [stack_offset + index_reg], tmp
_ = try self.addInst(.{
.tag = .mov_scale_dst,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = dst_addr_reg,
.reg2 = tmp_reg.to8(),
}),
.data = .{ .imm = 0 },
.data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
});
// add rcx, 1
// add index_reg, 1
_ = try self.addInst(.{
.tag = .add,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
.data = .{ .imm = 1 },
});
// add rax, 1
_ = try self.addInst(.{
.tag = .add,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
.ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
.data = .{ .imm = 1 },
});
@ -6127,7 +6096,6 @@ fn genInlineMemcpy(
try self.performReloc(loop_reloc);
}
/// Spills .rax register.
fn genInlineMemset(
self: *Self,
dst_ptr: MCValue,
@ -6135,12 +6103,22 @@ fn genInlineMemset(
len: MCValue,
opts: InlineMemcpyOpts,
) InnerError!void {
// TODO preserve contents of .rax and then restore
try self.register_manager.getReg(.rax, null);
const rax_lock = self.register_manager.lockRegAssumeUnused(.rax);
defer self.register_manager.unlockReg(rax_lock);
const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
self.register_manager.lockReg(reg)
else
null;
defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg);
const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
self.register_manager.lockReg(reg)
else
null;
defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
const addr_reg = regs[0];
const index_reg = regs[1].to64();
const addr_reg = try self.register_manager.allocReg(null, gp);
switch (dst_ptr) {
.memory,
.got_load,
@ -6173,17 +6151,15 @@ fn genInlineMemset(
return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
},
}
const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
defer self.register_manager.unlockReg(addr_reg_lock);
try self.genSetReg(Type.usize, .rax, len);
try self.genBinOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });
try self.genSetReg(Type.usize, index_reg, len);
try self.genBinOpMir(.sub, Type.usize, .{ .register = index_reg }, .{ .immediate = 1 });
// loop:
// cmp rax, -1
// cmp index_reg, -1
const loop_start = try self.addInst(.{
.tag = .cmp,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
.ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
.data = .{ .imm = @bitCast(u32, @as(i32, -1)) },
});
@ -6202,24 +6178,20 @@ fn genInlineMemset(
if (x > math.maxInt(i32)) {
return self.fail("TODO inline memset for value immediate larger than 32bits", .{});
}
// mov byte ptr [rbp + rax + stack_offset], imm
const payload = try self.addExtra(Mir.ImmPair{
.dest_off = 0,
.operand = @truncate(u32, x),
});
// mov byte ptr [rbp + index_reg + stack_offset], imm
_ = try self.addInst(.{
.tag = .mov_mem_index_imm,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg }),
.data = .{ .payload = payload },
.data = .{ .payload = try self.addExtra(Mir.IndexRegisterDispImm.encode(index_reg, 0, @truncate(u32, x))) },
});
},
else => return self.fail("TODO inline memset for value of type {}", .{value}),
}
// sub rax, 1
// sub index_reg, 1
_ = try self.addInst(.{
.tag = .sub,
.ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
.ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
.data = .{ .imm = 1 },
});

View File

@ -615,14 +615,15 @@ inline fn immOpSize(u_imm: u32) u6 {
fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode();
const scale = ops.flags;
const imm = emit.mir.instructions.items(.data)[inst].imm;
// OP reg1, [reg2 + scale*rcx + imm32]
const payload = emit.mir.instructions.items(.data)[inst].payload;
const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
// OP reg1, [reg2 + scale*index + imm32]
const scale_index = ScaleIndex{
.scale = scale,
.index = .rcx,
.index = index_reg_disp.index,
};
return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
.disp = imm,
.disp = index_reg_disp.disp,
.base = ops.reg2,
.scale_index = scale_index,
}), emit.code);
@ -631,22 +632,16 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode();
const scale = ops.flags;
const imm = emit.mir.instructions.items(.data)[inst].imm;
const payload = emit.mir.instructions.items(.data)[inst].payload;
const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
const scale_index = ScaleIndex{
.scale = scale,
.index = .rax,
.index = index_reg_disp.index,
};
if (ops.reg2 == .none) {
// OP qword ptr [reg1 + scale*rax + 0], imm32
return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
.disp = 0,
.base = ops.reg1,
.scale_index = scale_index,
}), imm, emit.code);
}
// OP [reg1 + scale*rax + imm32], reg2
assert(ops.reg2 != .none);
// OP [reg1 + scale*index + imm32], reg2
return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{
.disp = imm,
.disp = index_reg_disp.disp,
.base = ops.reg1,
.scale_index = scale_index,
}), ops.reg2, emit.code);
@ -656,24 +651,24 @@ fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
const ops = emit.mir.instructions.items(.ops)[inst].decode();
const scale = ops.flags;
const payload = emit.mir.instructions.items(.data)[inst].payload;
const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
const scale_index = ScaleIndex{
.scale = scale,
.index = .rax,
.index = index_reg_disp_imm.index,
};
// OP qword ptr [reg1 + scale*rax + imm32], imm32
// OP qword ptr [reg1 + scale*index + imm32], imm32
return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
.disp = imm_pair.dest_off,
.disp = index_reg_disp_imm.disp,
.base = ops.reg1,
.scale_index = scale_index,
}), imm_pair.operand, emit.code);
}), index_reg_disp_imm.imm, emit.code);
}
fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode();
assert(ops.reg2 == .none);
const payload = emit.mir.instructions.items(.data)[inst].payload;
const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
const ptr_size: Memory.PtrSize = switch (ops.flags) {
0b00 => .byte_ptr,
0b01 => .word_ptr,
@ -682,14 +677,14 @@ fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!v
};
const scale_index = ScaleIndex{
.scale = 0,
.index = .rax,
.index = index_reg_disp_imm.index,
};
// OP ptr [reg1 + rax*1 + imm32], imm32
// OP ptr [reg1 + index + imm32], imm32
return lowerToMiEnc(tag, RegisterOrMemory.mem(ptr_size, .{
.disp = imm_pair.dest_off,
.disp = index_reg_disp_imm.disp,
.base = ops.reg1,
.scale_index = scale_index,
}), imm_pair.operand, emit.code);
}), index_reg_disp_imm.imm, emit.code);
}
fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
@ -957,18 +952,19 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp);
},
0b10 => {
// lea reg, [rbp + rcx + imm32]
const imm = emit.mir.instructions.items(.data)[inst].imm;
// lea reg, [rbp + index + imm32]
const payload = emit.mir.instructions.items(.data)[inst].payload;
const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null;
const scale_index = ScaleIndex{
.scale = 0,
.index = .rcx,
.index = index_reg_disp.index,
};
return lowerToRmEnc(
.lea,
ops.reg1,
RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
.disp = imm,
.disp = index_reg_disp.disp,
.base = src_reg,
.scale_index = scale_index,
}),
@ -2255,6 +2251,7 @@ fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *st
encoder.rex(.{
.w = wide,
.b = base.isExtended(),
.x = if (mem_op.scale_index) |si| si.index.isExtended() else false,
});
}
opc.encode(encoder);
@ -2360,10 +2357,12 @@ fn lowerToMiXEnc(
encoder.rex(.{
.w = dst_mem.ptr_size == .qword_ptr,
.b = base.isExtended(),
.x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
});
} else {
encoder.rex(.{
.w = dst_mem.ptr_size == .qword_ptr,
.x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
});
}
opc.encode(encoder);
@ -2415,11 +2414,13 @@ fn lowerToRmEnc(
.w = setRexWRegister(reg),
.r = reg.isExtended(),
.b = base.isExtended(),
.x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
});
} else {
encoder.rex(.{
.w = setRexWRegister(reg),
.r = reg.isExtended(),
.x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
});
}
opc.encode(encoder);
@ -2460,11 +2461,13 @@ fn lowerToMrEnc(
.w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
.r = reg.isExtended(),
.b = base.isExtended(),
.x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
});
} else {
encoder.rex(.{
.w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
.r = reg.isExtended(),
.x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
});
}
opc.encode(encoder);
@ -2504,11 +2507,13 @@ fn lowerToRmiEnc(
.w = setRexWRegister(reg),
.r = reg.isExtended(),
.b = base.isExtended(),
.x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
});
} else {
encoder.rex(.{
.w = setRexWRegister(reg),
.r = reg.isExtended(),
.x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
});
}
opc.encode(encoder);
@ -2545,10 +2550,12 @@ fn lowerToVmEnc(
vex.rex(.{
.r = reg.isExtended(),
.b = base.isExtended(),
.x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
});
} else {
vex.rex(.{
.r = reg.isExtended(),
.x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
});
}
encoder.vex(enc.prefix);
@ -2585,10 +2592,12 @@ fn lowerToMvEnc(
vex.rex(.{
.r = reg.isExtended(),
.b = base.isExtended(),
.x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
});
} else {
vex.rex(.{
.r = reg.isExtended(),
.x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
});
}
encoder.vex(enc.prefix);

View File

@ -44,25 +44,28 @@ pub const Inst = struct {
/// 0b01 word ptr [reg1 + imm32], imm16
/// 0b10 dword ptr [reg1 + imm32], imm32
/// 0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64)
/// Notes:
/// * Uses `ImmPair` as payload
adc_mem_imm,
/// form: reg1, [reg2 + scale*rcx + imm32]
/// ops flags scale
/// 0b00 1
/// 0b01 2
/// 0b10 4
/// 0b11 8
adc_scale_src,
/// form: [reg1 + scale*rax + imm32], reg2
/// form: [reg1 + scale*rax + 0], imm32
/// form: reg1, [reg2 + scale*index + imm32]
/// ops flags scale
/// 0b00 1
/// 0b01 2
/// 0b10 4
/// 0b11 8
/// Notes:
/// * If reg2 is `none` then it means Data field `imm` is used as the immediate.
/// * Uses `IndexRegisterDisp` as payload
adc_scale_src,
/// form: [reg1 + scale*index + imm32], reg2
/// ops flags scale
/// 0b00 1
/// 0b01 2
/// 0b10 4
/// 0b11 8
/// Notes:
/// * Uses `IndexRegisterDisp` payload.
adc_scale_dst,
/// form: [reg1 + scale*rax + imm32], imm32
@ -72,14 +75,16 @@ pub const Inst = struct {
/// 0b10 4
/// 0b11 8
/// Notes:
/// * Data field `payload` points at `ImmPair`.
/// * Uses `IndexRegisterDispImm` payload.
adc_scale_imm,
/// ops flags: form:
/// 0b00 byte ptr [reg1 + rax + imm32], imm8
/// 0b01 word ptr [reg1 + rax + imm32], imm16
/// 0b10 dword ptr [reg1 + rax + imm32], imm32
/// 0b11 qword ptr [reg1 + rax + imm32], imm32 (sign-extended to imm64)
/// 0b00 byte ptr [reg1 + index + imm32], imm8
/// 0b01 word ptr [reg1 + index + imm32], imm16
/// 0b10 dword ptr [reg1 + index + imm32], imm32
/// 0b11 qword ptr [reg1 + index + imm32], imm32 (sign-extended to imm64)
/// Notes:
/// * Uses `IndexRegisterDispImm` payload.
adc_mem_index_imm,
// The following instructions all have the same encoding as `adc`.
@ -174,7 +179,9 @@ pub const Inst = struct {
/// 0b00 reg1, [reg2 + imm32]
/// 0b00 reg1, [ds:imm32]
/// 0b01 reg1, [rip + imm32]
/// 0b10 reg1, [reg2 + rcx + imm32]
/// 0b10 reg1, [reg2 + index + imm32]
/// Notes:
/// * 0b10 uses `IndexRegisterDisp` payload
lea,
/// ops flags: form:
@ -461,6 +468,66 @@ pub const Inst = struct {
}
};
pub const IndexRegisterDisp = struct {
/// Index register to use with SIB-based encoding
index: u32,
/// Displacement value
disp: u32,
pub fn encode(index: Register, disp: u32) IndexRegisterDisp {
return .{
.index = @enumToInt(index),
.disp = disp,
};
}
pub fn decode(this: IndexRegisterDisp) struct {
index: Register,
disp: u32,
} {
return .{
.index = @intToEnum(Register, this.index),
.disp = this.disp,
};
}
};
/// TODO: would it be worth making `IndexRegisterDisp` and `IndexRegisterDispImm` a variable length list
/// instead of having two structs, one a superset of the other one?
pub const IndexRegisterDispImm = struct {
/// Index register to use with SIB-based encoding
index: u32,
/// Displacement value
disp: u32,
/// Immediate
imm: u32,
pub fn encode(index: Register, disp: u32, imm: u32) IndexRegisterDispImm {
return .{
.index = @enumToInt(index),
.disp = disp,
.imm = imm,
};
}
pub fn decode(this: IndexRegisterDispImm) struct {
index: Register,
disp: u32,
imm: u32,
} {
return .{
.index = @intToEnum(Register, this.index),
.disp = this.disp,
.imm = this.imm,
};
}
};
/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers
/// in a compact manner.
pub const RegisterList = struct {
bitset: BitSet = BitSet.initEmpty(),