x86_64: rewrite array access

This commit is contained in:
Jacob Young 2025-02-14 02:20:43 -05:00
parent 9f87aacaaf
commit 4ea18c22f9
3 changed files with 392 additions and 106 deletions

View File

@ -2418,7 +2418,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
@setEvalBranchQuota(12_300);
@setEvalBranchQuota(12_400);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@ -2486,8 +2486,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.reduce_optimized => try cg.airReduce(inst),
.aggregate_init => try cg.airAggregateInit(inst),
.prefetch => try cg.airPrefetch(inst),
.array_elem_val => try cg.airArrayElemVal(inst),
// zig fmt: on
.arg => if (cg.debug_output != .none) {
@ -15150,7 +15148,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .dst0p, .leaa(.src0, .add_src0_elem_size_times_src1), ._, ._ },
.{ ._, ._, .lea, .dst0p, .leaa(.src0, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .elem_size_is = 1 }, .any },
@ -15264,7 +15262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .dst0p, .leaa(.src0, .sub_src0_elem_size_times_src1), ._, ._ },
.{ ._, ._, .lea, .dst0p, .leaa(.src0, .sub_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .elem_size_is = 1 }, .any },
@ -52951,6 +52949,200 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
try ops[0].toOffset(0, cg);
try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg);
},
.array_elem_val => if (use_old) try cg.airArrayElemVal(inst) else {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
const array_ty = cg.typeOf(bin_op.lhs);
const res_ty = array_ty.elemType2(zcu);
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{
.src_constraints = .{ .{ .bool_vec = .dword }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .imm32, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0d, .ua(.none, .add_src1_rem_32), ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .bool_vec = .dword }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0d, .src1d, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .bool_vec = .qword }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .imm32, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0q, .ua(.none, .add_src1_rem_64), ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .bool_vec = .qword }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0q, .src1q, ._, ._ },
} },
}, .{
.src_constraints = .{ .any_bool_vec, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .imm32, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .mema(.src0d, .add_src1_div_8_down_4), .ua(.none, .add_src1_rem_32), ._, ._ },
} },
}, .{
.src_constraints = .{ .any_bool_vec, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_gpr, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0d, .src1d, ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .simm32, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .dst0d, .mema(.src0b, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_gpr, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .dst0d, .memi(.src0b, .src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .word }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .simm32, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .dst0d, .mema(.src0w, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .word }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_gpr, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .dst0d, .memsi(.src0w, .@"2", .src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .dword }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .simm32, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .mov, .dst0d, .mema(.src0d, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .dword }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_gpr, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .mov, .dst0d, .memsi(.src0d, .@"4", .src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .qword }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .simm32, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .mov, .dst0q, .mema(.src0q, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_gpr, .none } },
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .mov, .dst0q, .memsi(.src0q, .@"8", .src1), ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => {
const elem_size = res_ty.abiSize(zcu);
const base = try cg.tempAllocReg(.usize, abi.RegisterClass.gp);
while (try ops[0].toBase(false, cg) or
try ops[1].toRegClass(true, .general_purpose, cg))
{}
const base_reg = base.tracking(cg).short.register.to64();
const rhs_reg = ops[1].tracking(cg).short.register.to64();
if (!std.math.isPowerOfTwo(elem_size)) {
try cg.spillEflagsIfOccupied();
try cg.asmRegisterRegisterImmediate(
.{ .i_, .mul },
rhs_reg,
rhs_reg,
.u(elem_size),
);
try cg.asmRegisterMemory(
.{ ._, .lea },
base_reg,
try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
);
} else if (elem_size > 8) {
try cg.spillEflagsIfOccupied();
try cg.asmRegisterImmediate(
.{ ._l, .sh },
rhs_reg,
.u(std.math.log2_int(u64, elem_size)),
);
try cg.asmRegisterMemory(
.{ ._, .lea },
base_reg,
try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
);
} else try cg.asmRegisterMemory(
.{ ._, .lea },
base_reg,
try ops[0].tracking(cg).short.mem(cg, .{
.index = rhs_reg,
.scale = .fromFactor(@intCast(elem_size)),
}),
);
// Hack around Sema insanity: lhs could be an arbitrarily large comptime-known array
// which could easily get spilled by the upcoming `load`, which would infinite recurse
// since spilling an array requires the same operation that triggered the spill.
try ops[0].die(cg);
ops[0] = base;
res[0] = try ops[0].load(res_ty, .{}, cg);
},
else => |e| return e,
};
try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
},
.slice_elem_val, .ptr_elem_val => |air_tag| if (use_old) switch (air_tag) {
else => unreachable,
.slice_elem_val => try cg.airSliceElemVal(inst),
@ -52968,7 +53160,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .dst0d, .leaa(.src0b, .add_src0_elem_size_times_src1), ._, ._ },
.{ ._, ._, .movzx, .dst0d, .leaa(.src0b, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .byte }, .any },
@ -52986,7 +53178,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .dst0d, .leaa(.src0w, .add_src0_elem_size_times_src1), ._, ._ },
.{ ._, ._, .movzx, .dst0d, .leaa(.src0w, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .word }, .any },
@ -53004,7 +53196,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .mov, .dst0d, .leaa(.src0d, .add_src0_elem_size_times_src1), ._, ._ },
.{ ._, ._, .mov, .dst0d, .leaa(.src0d, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.dst_constraints = .{ .{ .int = .dword }, .any },
@ -53022,7 +53214,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .mov, .dst0q, .leaa(.src0q, .add_src0_elem_size_times_src1), ._, ._ },
.{ ._, ._, .mov, .dst0q, .leaa(.src0q, .add_src0_elem_size_mul_src1), ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
@ -53040,8 +53232,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
while (true) for (&ops) |*op| {
if (try op.toRegClass(true, .general_purpose, cg)) break;
} else break;
const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
const lhs_reg = ops[0].tracking(cg).short.register.to64();
const rhs_reg = ops[1].tracking(cg).short.register.to64();
if (!std.math.isPowerOfTwo(elem_size)) {
try cg.spillEflagsIfOccupied();
try cg.asmRegisterRegisterImmediate(
@ -53052,7 +53244,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
);
try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
.mod = .{ .rm = .{ .index = rhs_reg } },
});
} else if (elem_size > 8) {
try cg.spillEflagsIfOccupied();
@ -53063,12 +53255,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
);
try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
.mod = .{ .rm = .{ .index = rhs_reg } },
});
} else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{
.size = .qword,
.index = rhs_reg,
.scale = .fromFactor(@intCast(elem_size)),
} },
@ -53095,8 +53286,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
while (true) for (&ops) |*op| {
if (try op.toRegClass(true, .general_purpose, cg)) break;
} else break;
const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
const lhs_reg = ops[0].tracking(cg).short.register.to64();
const rhs_reg = ops[1].tracking(cg).short.register.to64();
if (!std.math.isPowerOfTwo(elem_size)) {
try cg.spillEflagsIfOccupied();
try cg.asmRegisterRegisterImmediate(
@ -53107,7 +53298,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
);
try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
.mod = .{ .rm = .{ .index = rhs_reg } },
});
} else if (elem_size > 8) {
try cg.spillEflagsIfOccupied();
@ -53118,12 +53309,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
);
try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
.mod = .{ .rm = .{ .index = rhs_reg } },
});
} else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{
.size = .qword,
.index = rhs_reg,
.scale = .fromFactor(@intCast(elem_size)),
} },
@ -75183,7 +75373,7 @@ fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void {
registerAlias(dst_reg, dst_abi_size),
.{
.base = .{ .reg = src_reg },
.mod = .{ .rm = .{ .size = .qword, .disp = pl_off } },
.mod = .{ .rm = .{ .disp = pl_off } },
},
);
break :result .{ .register = dst_reg };
@ -75446,7 +75636,7 @@ fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
registerAlias(dst_reg, dst_abi_size),
.{
.base = .{ .reg = src_reg },
.mod = .{ .rm = .{ .size = .qword, .disp = 8 } },
.mod = .{ .rm = .{ .disp = 8 } },
},
);
@ -75700,7 +75890,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.asmRegisterMemory(
.{ ._, .lea },
addr_reg,
.{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
.{ .base = .{ .frame = frame_index } },
);
},
.load_frame => |frame_addr| try self.asmRegisterMemory(
@ -75708,7 +75898,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
addr_reg,
.{
.base = .{ .frame = frame_addr.index },
.mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } },
.mod = .{ .rm = .{ .disp = frame_addr.off } },
},
),
.memory,
@ -76717,7 +76907,6 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
.{
.base = .{ .reg = dst.to64() },
.mod = .{ .rm = .{
.size = .qword,
.index = tmp.to64(),
.scale = .@"4",
} },
@ -76744,7 +76933,6 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
.{
.base = .{ .reg = tmp.to64() },
.mod = .{ .rm = .{
.size = .qword,
.index = dst.to64(),
.scale = .@"2",
} },
@ -85591,7 +85779,6 @@ fn genSetReg(
dst_reg.to64(),
.{
.base = .{ .reloc = sym_off.sym_index },
.mod = .{ .rm = .{ .size = .qword } },
},
);
if (sym_off.off != 0) try self.asmRegisterMemory(
@ -85599,10 +85786,7 @@ fn genSetReg(
dst_reg.to64(),
.{
.base = .{ .reg = dst_reg.to64() },
.mod = .{ .rm = .{
.size = .qword,
.disp = sym_off.off,
} },
.mod = .{ .rm = .{ .disp = sym_off.off } },
},
);
},
@ -85816,18 +86000,12 @@ fn genSetMem(
const src_reg = registerAlias(reg_off.reg, abi_size);
try self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{
.base = .{ .reg = src_reg },
.mod = .{ .rm = .{
.size = .qword,
.disp = reg_off.off,
} },
.mod = .{ .rm = .{ .disp = reg_off.off } },
});
try self.genSetMem(base, disp, ty, .{ .register = reg_off.reg }, opts);
return self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{
.base = .{ .reg = src_reg },
.mod = .{ .rm = .{
.size = .qword,
.disp = -reg_off.off,
} },
.mod = .{ .rm = .{ .disp = -reg_off.off } },
});
},
else => |e| return e,
@ -87185,10 +87363,7 @@ fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void {
start_reg.to64(),
.{
.base = .{ .reg = addr_reg.to64() },
.mod = .{ .rm = .{
.size = .dword,
.index = start_reg.to64(),
} },
.mod = .{ .rm = .{ .index = start_reg.to64() } },
},
);
try self.asmRegisterMemory(
@ -87196,10 +87371,7 @@ fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void {
end_reg.to32(),
.{
.base = .{ .reg = end_reg.to64() },
.mod = .{ .rm = .{
.size = .byte,
.disp = -1,
} },
.mod = .{ .rm = .{ .disp = -1 } },
},
);
@ -89375,17 +89547,11 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{});
if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
.base = .{ .reg = addr_reg },
.mod = .{ .rm = .{
.size = .qword,
.index = offset_reg.to64(),
} },
.mod = .{ .rm = .{ .index = offset_reg.to64() } },
});
try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
.base = .{ .reg = offset_reg.to64() },
.mod = .{ .rm = .{
.size = .qword,
.disp = 8,
} },
.mod = .{ .rm = .{ .disp = 8 } },
});
try self.genCopy(.c_uint, gp_offset, .{ .register = offset_reg }, .{});
const done_reloc = try self.asmJmpReloc(undefined);
@ -89394,10 +89560,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{});
try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
.base = .{ .reg = addr_reg },
.mod = .{ .rm = .{
.size = .qword,
.disp = @intCast(@max(promote_ty.abiSize(zcu), 8)),
} },
.mod = .{ .rm = .{ .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)) } },
});
try self.genCopy(
ptr_anyopaque_ty,
@ -89423,17 +89586,11 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{});
if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
.base = .{ .reg = addr_reg },
.mod = .{ .rm = .{
.size = .qword,
.index = offset_reg.to64(),
} },
.mod = .{ .rm = .{ .index = offset_reg.to64() } },
});
try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
.base = .{ .reg = offset_reg.to64() },
.mod = .{ .rm = .{
.size = .qword,
.disp = 16,
} },
.mod = .{ .rm = .{ .disp = 16 } },
});
try self.genCopy(.c_uint, fp_offset, .{ .register = offset_reg }, .{});
const done_reloc = try self.asmJmpReloc(undefined);
@ -89442,10 +89599,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{});
try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
.base = .{ .reg = addr_reg },
.mod = .{ .rm = .{
.size = .qword,
.disp = @intCast(@max(promote_ty.abiSize(zcu), 8)),
} },
.mod = .{ .rm = .{ .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)) } },
});
try self.genCopy(
ptr_anyopaque_ty,
@ -90505,10 +90659,7 @@ const Temp = struct {
new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
.base = .{ .reg = reg.to64() },
.mod = .{ .rm = .{
.size = .qword,
.disp = off,
} },
.mod = .{ .rm = .{ .disp = off } },
});
},
.register_offset => |reg_off| {
@ -90517,10 +90668,7 @@ const Temp = struct {
new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
.base = .{ .reg = reg_off.reg.to64() },
.mod = .{ .rm = .{
.size = .qword,
.disp = reg_off.off + off,
} },
.mod = .{ .rm = .{ .disp = reg_off.off + off } },
});
},
.lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{
@ -90627,10 +90775,7 @@ const Temp = struct {
new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
.base = .{ .reg = reg_off.reg.to64() },
.mod = .{ .rm = .{
.size = .qword,
.disp = reg_off.off + @as(u31, limb_index) * 8,
} },
.mod = .{ .rm = .{ .disp = reg_off.off + @as(u31, limb_index) * 8 } },
});
},
.load_symbol => |sym_off| {
@ -93462,13 +93607,14 @@ const Select = struct {
elem_size,
src0_elem_size,
dst0_elem_size,
src0_elem_size_times_src1,
src0_elem_size_mul_src1,
src1,
log2_src0_elem_size,
smin,
smax,
umax,
},
op: enum(u2) { mul, div, rem_8_mul },
op: enum(u2) { mul, div, div_8_down, rem_8_mul },
rhs: Memory.Scale,
const none: Adjust = .{ .sign = .pos, .lhs = .none, .op = .mul, .rhs = .@"1" };
@ -93512,8 +93658,11 @@ const Select = struct {
const add_8_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"8" };
const add_src0_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .div, .rhs = .@"8" };
const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" };
const add_src0_elem_size_times_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" };
const sub_src0_elem_size_times_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" };
const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" };
const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" };
const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" };
const add_log2_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .log2_src0_elem_size, .op = .mul, .rhs = .@"1" };
const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" };
const add_elem_limbs: Adjust = .{ .sign = .pos, .lhs = .elem_limbs, .op = .mul, .rhs = .@"1" };
@ -94086,8 +94235,9 @@ const Select = struct {
.elem_size => @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
.src0_elem_size => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
.dst0_elem_size => @intCast(Select.Operand.Ref.dst0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
.src0_elem_size_times_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
.src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
Select.Operand.Ref.src1.valueOf(s).immediate),
.src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate),
.log2_src0_elem_size => @intCast(std.math.log2(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))),
.smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate(
-%op.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
@ -94107,6 +94257,7 @@ const Select = struct {
break :op_res op_res[0];
},
.div => @shrExact(lhs, rhs),
.div_8_down => lhs >> 3 & @as(SignedImm, -1) << rhs,
.rem_8_mul => lhs & (@as(SignedImm, 1) << @intCast(@as(u3, 3) + rhs)) - 1,
};
return switch (op.flags.adjust.sign) {

View File

@ -431,7 +431,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
_ = lower.reloc(.{ .linker_tlsld = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
.{ .mem = Memory.initRip(.none, 0) },
}, lower.target);
lower.result_insts_len += 1;
_ = lower.reloc(.{
@ -443,7 +443,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
lower.result_insts_len += 1;
_ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0);
emit_mnemonic = .lea;
break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
break :op .{ .mem = Memory.initSib(.none, .{
.base = .{ .reg = .rax },
.disp = std.math.minInt(i32),
}) };
@ -456,7 +456,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
lower.result_insts_len += 1;
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
emit_mnemonic = .lea;
break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
break :op .{ .mem = Memory.initSib(.none, .{
.base = .{ .reg = .rax },
.disp = std.math.minInt(i32),
}) };
@ -465,10 +465,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
if (lower.pic) switch (mnemonic) {
.lea => {
if (elf_sym.flags.is_extern_ptr) emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
},
.lea => if (elf_sym.flags.is_extern_ptr) {
emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(.ptr, 0) };
} else break :op .{ .mem = Memory.initRip(.none, 0) },
.mov => {
if (elf_sym.flags.is_extern_ptr) {
const reg = ops[0].reg;
@ -505,7 +505,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
.{ .mem = Memory.initRip(.ptr, 0) },
}, lower.target);
lower.result_insts_len += 1;
lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
@ -518,10 +518,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
break :op switch (mnemonic) {
.lea => {
if (macho_sym.flags.is_extern_ptr) emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
},
.lea => if (macho_sym.flags.is_extern_ptr) {
emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(.ptr, 0) };
} else break :op .{ .mem = Memory.initRip(.none, 0) },
.mov => {
if (macho_sym.flags.is_extern_ptr) {
const reg = ops[0].reg;

View File

@ -1,4 +1,4 @@
fn access(comptime array: anytype) !void {
fn accessSlice(comptime array: anytype) !void {
var slice: []const @typeInfo(@TypeOf(array)).array.child = undefined;
slice = &array;
inline for (0.., &array) |ct_index, *elem| {
@ -20,18 +20,153 @@ fn access(comptime array: anytype) !void {
if (slice[rt_index] != elem.*) return error.Unexpected;
}
}
test access {
try access([3]u8{ 0xdb, 0xef, 0xbd });
try access([3]u16{ 0x340e, 0x3654, 0x88d7 });
try access([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba });
try access([3]u64{
test accessSlice {
try accessSlice([3]u8{ 0xdb, 0xef, 0xbd });
try accessSlice([3]u16{ 0x340e, 0x3654, 0x88d7 });
try accessSlice([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba });
try accessSlice([3]u64{
0x9327a4f5221666a6,
0x5c34d3ddd84a8b12,
0xbae087f39f649260,
});
try access([3]u128{
try accessSlice([3]u128{
0x601cf010065444d4d42d5536dd9b95db,
0xa03f592fcaa22d40af23a0c735531e3c,
0x5da44907b31602b95c2d93f0b582ceab,
});
}
fn accessVector(comptime init: anytype) !void {
const Vector = @TypeOf(init);
var vector: Vector = undefined;
vector = init;
inline for (0..@typeInfo(Vector).vector.len) |ct_index| {
var rt_index: usize = undefined;
rt_index = ct_index;
if (&vector[rt_index] != &vector[ct_index]) return error.Unexpected;
if (vector[rt_index] != vector[ct_index]) return error.Unexpected;
}
}
test accessVector {
try accessVector(@Vector(1, bool){
false,
});
try accessVector(@Vector(2, bool){
false, true,
});
try accessVector(@Vector(3, bool){
true, true, false,
});
try accessVector(@Vector(5, bool){
true, false, true, false, true,
});
try accessVector(@Vector(7, bool){
true, false, true, true, true, false, true,
});
try accessVector(@Vector(8, bool){
false, true, false, true, false, false, false, true,
});
try accessVector(@Vector(9, bool){
true, true, false, true, false, false, false, false,
true,
});
try accessVector(@Vector(15, bool){
false, true, true, true, false, true, false, false,
true, true, false, false, true, false, false,
});
try accessVector(@Vector(16, bool){
true, true, false, true, false, false, false, false,
false, true, true, false, false, false, true, true,
});
try accessVector(@Vector(17, bool){
true, false, true, true, false, true, false, true,
true, true, true, false, false, false, true, true,
false,
});
try accessVector(@Vector(31, bool){
true, false, true, true, false, true, true, true,
false, true, false, true, false, true, true, true,
false, false, true, false, false, false, false, true,
true, true, true, false, false, false, false,
});
try accessVector(@Vector(32, bool){
true, true, false, false, false, true, true, true,
false, true, true, true, false, true, false, true,
false, true, false, true, false, true, true, false,
false, false, false, false, false, true, true, true,
});
try accessVector(@Vector(33, bool){
true, false, false, false, false, true, true, true,
false, false, true, false, true, true, false, true,
true, true, false, true, true, false, false, false,
false, true, false, false, false, true, true, false,
false,
});
try accessVector(@Vector(63, bool){
false, false, true, true, true, false, true, true,
true, false, true, true, true, false, true, false,
true, true, false, true, false, true, true, true,
false, false, true, false, false, false, false, true,
true, true, true, true, false, true, false, true,
true, true, false, false, true, false, false, true,
false, true, false, false, false, false, true, true,
false, true, false, false, true, true, true,
});
try accessVector(@Vector(64, bool){
false, false, true, true, true, false, true, true,
true, false, true, true, false, true, true, false,
false, false, false, false, true, true, false, true,
true, true, true, true, false, false, false, true,
true, false, true, true, false, false, true, false,
false, true, true, false, true, true, false, false,
true, true, false, true, false, true, true, true,
false, true, true, false, false, false, false, false,
});
try accessVector(@Vector(65, bool){
false, false, true, true, true, true, true, true,
true, false, false, false, false, true, true, false,
true, false, true, true, true, false, false, false,
true, false, true, true, false, true, true, true,
true, true, false, true, true, false, true, false,
false, true, false, true, false, false, true, false,
true, false, true, true, true, false, true, true,
false, false, true, true, true, true, false, false,
true,
});
try accessVector(@Vector(8, u8){
0x60, 0xf7, 0xf4, 0xb0, 0x05, 0xd3, 0x06, 0x78,
});
try accessVector(@Vector(8, u16){
0x9c91, 0xfb8b, 0x7f80, 0x8304, 0x6e52, 0xd8ef, 0x37fc, 0x7851,
});
try accessVector(@Vector(8, u32){
0x688b88e2, 0x68e2b7a2, 0x87574680, 0xab4f0769,
0x75472bb5, 0xa791f2ae, 0xeb2ed416, 0x5f05ce82,
});
try accessVector(@Vector(8, u64){
0xdefd1ddffaedf818, 0x91c78a29d3d59890,
0x842aaf8fd3c7b785, 0x970a07b8f9f4a6b3,
0x21b2425d1a428246, 0xea50e41174a7977b,
0x08d0f1c4f5978b74, 0x8dc88a7fd85e0e67,
});
try accessVector(@Vector(8, u128){
0x6f2cbde1fb219b1e73d7f774d10f0d94,
0x7c1412616cda20436d7106691d8ba4cc,
0x4ee940b50e97675b3b35d7872a35b5ad,
0x6d994fb8caa1b2fac48acbb68fa2d2f1,
0xdee698c7ec8de9b5940903e3fc665b63,
0x0751491a509e4a1ce8cfa6d62fe9e74c,
0x3d880f0a927ce3bfc2682b72070fcd50,
0x82f0eec62881598699eeb93fbb456e95,
});
try accessVector(@Vector(8, u256){
0x6ee4f35fe624d365952f73960791238ac781bfba782abc7866a691063e43ce48,
0xb006491f54a9c9292458a5835b7d5f4cfa18136f175eef0a13bb8adf5c3dc061,
0xd6e25ca1bc5685fc52609e261b9065bc05a8662e9291660033dd7f6d98e562b3,
0x992c5e54e0e6331dac258996be7dae9b2a2eff323a39043ba8d2721420dc5f5c,
0x257313f45fb3556d0fc323d5f38c953e9a093fe2278655312b6a5b64aab9d901,
0x6c8ad2182b9a3b2b19c2c9b152956b383d0fee2e3fbd5b02ed72227446a7b221,
0xd80cafc2252b289793799675e43f97ba4a5448c7b57e1544a464687b435efc7b,
0xfcb480f2d70afd53c4689dd3f5db7638c24302f2a6a15f738167db090d91fb28,
});
}