x86_64 backend: implement @memset for element ABI size > 1

* make memset and memset_safe guarantee that if the length is
   comptime-known then it will be nonzero.
This commit is contained in:
Andrew Kelley 2023-04-23 20:52:43 -07:00
parent 7c56145a76
commit 881e931ee1
4 changed files with 74 additions and 25 deletions

View File

@ -641,6 +641,8 @@ pub const Inst = struct {
/// The element value may be undefined, in which case the destination
/// memory region has undefined bytes after this function executes. In
/// such case ignoring this instruction is legal lowering.
/// If the length is compile-time known (due to the destination being a
/// pointer-to-array), then it is guaranteed to be greater than zero.
memset,
/// Same as `memset`, except if the element value is undefined, the memory region
/// should be filled with 0xaa bytes, and any other safety metadata such as Valgrind
@ -654,6 +656,9 @@ pub const Inst = struct {
/// The two memory regions must not overlap.
/// Result type is always void.
/// Uses the `bin_op` field. LHS is the dest slice. RHS is the source pointer.
/// If the length is compile-time known (due to the destination or
/// source being a pointer-to-array), then it is guaranteed to be
/// greater than zero.
memcpy,
/// Uses the `ty_pl` field with payload `Cmpxchg`.

View File

@ -21918,8 +21918,6 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
} else break :rs src_src;
} else dest_src;
try sema.requireRuntimeBlock(block, src, runtime_src);
const dest_ty = sema.typeOf(dest_ptr);
const src_ty = sema.typeOf(src_ptr);
@ -21946,10 +21944,16 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
var new_src_ptr = src_ptr;
if (len_val) |val| {
const len = val.toUnsignedInt(target);
if (len == 0) {
// This AIR instruction guarantees length > 0 if it is comptime-known.
return;
}
new_dest_ptr = try upgradeToArrayPtr(sema, block, dest_ptr, len);
new_src_ptr = try upgradeToArrayPtr(sema, block, src_ptr, len);
}
try sema.requireRuntimeBlock(block, src, runtime_src);
// Aliasing safety check.
if (block.wantSafety()) {
const dest_int = try block.addUnOp(.ptrtoint, new_dest_ptr);
@ -21995,13 +21999,18 @@ fn zirMemset(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
const target = sema.mod.getTarget();
const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |ptr_val| rs: {
const len_air_ref = try sema.fieldVal(block, src, dest_ptr, "len", dest_src);
const len_val = (try sema.resolveDefinedValue(block, dest_src, len_air_ref)) orelse
break :rs dest_src;
const len_u64 = (try len_val.getUnsignedIntAdvanced(target, sema)).?;
const len = try sema.usizeCast(block, dest_src, len_u64);
if (len == 0) {
// This AIR instruction guarantees length > 0 if it is comptime-known.
return;
}
if (!ptr_val.isComptimeMutablePtr()) break :rs dest_src;
if (try sema.resolveMaybeUndefVal(uncoerced_elem)) |_| {
const len_air_ref = try sema.fieldVal(block, src, dest_ptr, "len", dest_src);
const len_val = (try sema.resolveDefinedValue(block, dest_src, len_air_ref)) orelse
break :rs dest_src;
const len_u64 = (try len_val.getUnsignedIntAdvanced(target, sema)).?;
const len = try sema.usizeCast(block, dest_src, len_u64);
for (0..len) |i| {
const elem_index = try sema.addIntUnsigned(Type.usize, i);
const elem_ptr = try sema.elemPtr(

View File

@ -8175,23 +8175,62 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
};
defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock);
if (elem_ty.abiSize(self.target.*) != 1) {
return self.fail("TODO implement airMemset when element ABI size > 1", .{});
if (elem_ty.abiSize(self.target.*) == 1) {
const len = switch (dst_ptr_ty.ptrSize()) {
// TODO: this only handles slices stored in the stack
.Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }),
.One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }),
.C, .Many => unreachable,
};
const len_lock: ?RegisterLock = switch (len) {
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
else => null,
};
defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
// TODO: dst_ptr could be a slice rather than raw pointer
try self.genInlineMemset(dst_ptr, src_val, len, .{});
return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
}
const len = switch (dst_ptr_ty.ptrSize()) {
.Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }),
.One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }),
.C, .Many => unreachable,
};
const len_lock: ?RegisterLock = switch (len) {
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
else => null,
};
defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
// Store the first element, and then rely on memcpy copying forwards.
// Length zero requires a runtime check - so we handle arrays specially
// here to elide it.
switch (dst_ptr_ty.ptrSize()) {
.Slice => {
// TODO: this only handles slices stored in the stack
const ptr = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 0 });
const len = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 });
_ = ptr;
_ = len;
return self.fail("TODO implement airMemset for x86_64 with ABI size > 1 using a slice", .{});
},
.One => {
const len = dst_ptr_ty.childType().arrayLen();
assert(len != 0); // prevented by Sema
try self.store(dst_ptr, src_val, dst_ptr_ty, elem_ty);
// TODO: dst_ptr could be a slice rather than raw pointer
try self.genInlineMemset(dst_ptr, src_val, len, .{});
const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
defer self.register_manager.unlockReg(second_elem_ptr_lock);
const elem_abi_size = @intCast(u31, elem_ty.abiSize(self.target.*));
try self.asmRegisterMemory(
.lea,
second_elem_ptr_reg,
Memory.sib(.qword, .{
.base = try self.copyToTmpRegister(Type.usize, dst_ptr),
.disp = elem_abi_size,
}),
);
const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy, .{});
},
.C, .Many => unreachable,
}
return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
}

View File

@ -361,10 +361,6 @@ test "@memset on array pointers" {
// TODO: implement memset when element ABI size > 1
return error.SkipZigTest;
}
if (builtin.zig_backend == .stage2_x86_64) {
// TODO: implement memset when element ABI size > 1
return error.SkipZigTest;
}
try testMemsetArray();
try comptime testMemsetArray();