diff --git a/src/Sema.zig b/src/Sema.zig index 8b47f1877b..e05308b6c0 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -26953,9 +26953,11 @@ fn storePtrVal( defer sema.gpa.free(buffer); reinterpret.val_ptr.*.writeToMemory(mut_kit.ty, sema.mod, buffer) catch |err| switch (err) { error.ReinterpretDeclRef => unreachable, + error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already }; operand_val.writeToMemory(operand_ty, sema.mod, buffer[reinterpret.byte_offset..]) catch |err| switch (err) { error.ReinterpretDeclRef => unreachable, + error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already }; const arena = mut_kit.beginArena(sema.mod); @@ -27905,6 +27907,7 @@ fn bitCastVal( defer sema.gpa.free(buffer); val.writeToMemory(old_ty, sema.mod, buffer) catch |err| switch (err) { error.ReinterpretDeclRef => return null, + error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already }; return try Value.readFromMemory(new_ty, sema.mod, buffer[buffer_offset..], sema.arena); } diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index a7a1d8f56f..d697a41988 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -8424,28 +8424,45 @@ pub const FuncGen = struct { const dest_slice = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const elem_ty = self.air.typeOf(bin_op.rhs); - const target = self.dg.module.getTarget(); - const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false; + const module = self.dg.module; + const target = module.getTarget(); const dest_ptr_align = ptr_ty.ptrAlignment(target); const u8_llvm_ty = self.context.intType(8); const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty); const is_volatile = ptr_ty.isVolatilePtr(); - if (val_is_undef) { - // Even if safety is disabled, we still emit a memset to undefined since it conveys - // extra information to LLVM. However, safety makes the difference between using - // 0xaa or actual undefined for the fill byte. - const fill_byte = if (safety) - u8_llvm_ty.constInt(0xaa, .False) - else - u8_llvm_ty.getUndef(); - const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); + if (self.air.value(bin_op.rhs)) |elem_val| { + if (elem_val.isUndefDeep()) { + // Even if safety is disabled, we still emit a memset to undefined since it conveys + // extra information to LLVM. However, safety makes the difference between using + // 0xaa or actual undefined for the fill byte. + const fill_byte = if (safety) + u8_llvm_ty.constInt(0xaa, .False) + else + u8_llvm_ty.getUndef(); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); - if (safety and self.dg.module.comp.bin_file.options.valgrind) { - self.valgrindMarkUndef(dest_ptr, len); + if (safety and module.comp.bin_file.options.valgrind) { + self.valgrindMarkUndef(dest_ptr, len); + } + return null; + } + + // Test if the element value is compile-time known to be a + // repeating byte pattern, for example, `@as(u64, 0)` has a + // repeating byte pattern of 0 bytes. In such case, the memset + // intrinsic can be used. + var value_buffer: Value.Payload.U64 = undefined; + if (try elem_val.hasRepeatedByteRepr(elem_ty, module, &value_buffer)) |byte_val| { + const fill_byte = try self.resolveValue(.{ + .ty = Type.u8, + .val = byte_val, + }); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); + return null; } - return null; } const value = try self.resolveInst(bin_op.rhs); diff --git a/src/value.zig b/src/value.zig index 05e9d24ee2..2b9636f5e9 100644 --- a/src/value.zig +++ b/src/value.zig @@ -1278,7 +1278,10 @@ pub const Value = extern union { /// /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past /// the end of the value in memory. - pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{ReinterpretDeclRef}!void { + pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{ + ReinterpretDeclRef, + IllDefinedMemoryLayout, + }!void { const target = mod.getTarget(); const endian = target.cpu.arch.endian(); if (val.isUndef()) { @@ -1345,7 +1348,7 @@ pub const Value = extern union { return writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0); }, .Struct => switch (ty.containerLayout()) { - .Auto => unreachable, // Sema is supposed to have emitted a compile error already + .Auto => return error.IllDefinedMemoryLayout, .Extern => { const fields = ty.structFields().values(); const field_vals = val.castTag(.aggregate).?.data; @@ -1366,7 +1369,7 @@ pub const Value = extern union { std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), endian); }, .Union => switch (ty.containerLayout()) { - .Auto => unreachable, + .Auto => return error.IllDefinedMemoryLayout, .Extern => @panic("TODO implement writeToMemory for extern unions"), .Packed => { const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8; @@ -5381,6 +5384,35 @@ pub const Value = extern union { } } + /// If the value is represented in-memory as a series of bytes that all + /// have the same value, return that byte value, otherwise null. + pub fn hasRepeatedByteRepr(val: Value, ty: Type, mod: *Module, value_buffer: *Payload.U64) !?Value { + const target = mod.getTarget(); + const abi_size = ty.abiSize(target); + assert(abi_size >= 1); + const byte_buffer = try mod.gpa.alloc(u8, abi_size); + defer mod.gpa.free(byte_buffer); + + writeToMemory(val, ty, mod, byte_buffer) catch |err| switch (err) { + error.ReinterpretDeclRef => return null, + // TODO: The writeToMemory function was originally created for the purpose + // of comptime pointer casting. However, it is now additionally being used + // for checking the actual memory layout that will be generated by machine + // code late in compilation. So, this error handling is too aggressive and + // causes some false negatives, causing less-than-ideal code generation. + error.IllDefinedMemoryLayout => return null, + }; + const first_byte = byte_buffer[0]; + for (byte_buffer[1..]) |byte| { + if (byte != first_byte) return null; + } + value_buffer.* = .{ + .base = .{ .tag = .int_u64 }, + .data = first_byte, + }; + return initPayload(&value_buffer.base); + } + /// This type is not copyable since it may contain pointers to its inner data. pub const Payload = struct { tag: Tag, diff --git a/test/behavior/memset.zig b/test/behavior/memset.zig index 69add499f9..374fd4b6f5 100644 --- a/test/behavior/memset.zig +++ b/test/behavior/memset.zig @@ -94,7 +94,7 @@ test "memset with 1-byte array element" { try expect(buf[4][0]); } -test "memset with large array element" { +test "memset with large array element, runtime known" { const A = [128]u64; var buf: [5]A = undefined; var runtime_known_element = [_]u64{0} ** 128; @@ -106,6 +106,18 @@ test "memset with large array element" { for (buf[4]) |elem| try expect(elem == 0); } +test "memset with large array element, comptime known" { + const A = [128]u64; + var buf: [5]A = undefined; + const comptime_known_element = [_]u64{0} ** 128; + @memset(&buf, comptime_known_element); + for (buf[0]) |elem| try expect(elem == 0); + for (buf[1]) |elem| try expect(elem == 0); + for (buf[2]) |elem| try expect(elem == 0); + for (buf[3]) |elem| try expect(elem == 0); + for (buf[4]) |elem| try expect(elem == 0); +} + test "memcpy and memset intrinsics" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;