From f278038923c33ddf710f0328a07ce18613a94130 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Sat, 30 Nov 2024 16:08:26 -0800 Subject: [PATCH] replace `compiler_rt` memcpy with a better version --- lib/compiler_rt.zig | 1 - lib/compiler_rt/memcpy.zig | 145 ++++++++++++++++++++++++++++++++---- lib/compiler_rt/memmove.zig | 25 ------- 3 files changed, 131 insertions(+), 40 deletions(-) delete mode 100644 lib/compiler_rt/memmove.zig diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index 82aeb7f88e..1369e0a7e5 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -233,7 +233,6 @@ comptime { _ = @import("compiler_rt/memcpy.zig"); _ = @import("compiler_rt/memset.zig"); - _ = @import("compiler_rt/memmove.zig"); _ = @import("compiler_rt/memcmp.zig"); _ = @import("compiler_rt/bcmp.zig"); _ = @import("compiler_rt/ssp.zig"); diff --git a/lib/compiler_rt/memcpy.zig b/lib/compiler_rt/memcpy.zig index 4609f25576..2ef38927b0 100644 --- a/lib/compiler_rt/memcpy.zig +++ b/lib/compiler_rt/memcpy.zig @@ -5,24 +5,141 @@ const builtin = @import("builtin"); comptime { if (builtin.object_format != .c) { @export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility }); + @export(&memcpy, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility }); } } -pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 { - @setRuntimeSafety(false); - - if (len != 0) { - var d = dest.?; - var s = src.?; - var n = len; - while (true) { - d[0] = s[0]; - n -= 1; - if (n == 0) break; - d += 1; - s += 1; - } +// a port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S +pub fn memcpy(maybe_dest: ?[*]u8, maybe_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 { + if (len == 0) { + @branchHint(.unlikely); + return maybe_dest; } + const dest = maybe_dest.?; + const src = maybe_src.?; + + if (len < 8) { + @branchHint(.unlikely); + if (len == 1) { + @branchHint(.unlikely); + dest[0] = src[0]; + } else if (len >= 4) { + @branchHint(.unlikely); + blockCopy(dest, src, 4, len); + } else { + blockCopy(dest, src, 2, len); + } + return dest; + } + + if (len > 32) { + @branchHint(.unlikely); + if (len > 256) { + @branchHint(.unlikely); + copyMove(dest, src, len); + return dest; + } + copyLong(dest, src, len); + return dest; + } + + if (len > 16) { + @branchHint(.unlikely); + blockCopy(dest, src, 16, len); + return dest; + } + + blockCopy(dest, src, 8, len); + return dest; } + +inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void { + const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*; + const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*; + dest[0..block_size].* = first; + dest[len - block_size ..][0..block_size].* = second; +} + +inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void { + var array: [8]@Vector(32, u8) = undefined; + + inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| { + array[i * 2] = src[(N / 2) - 32 ..][0..32].*; + array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*; + + if (len <= N) { + @branchHint(.unlikely); + for (0..i + 1) |j| { + dest[j * 32 ..][0..32].* = array[j * 2]; + dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1]; + } + return; + } + } +} + +inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void { + if (@intFromPtr(src) >= @intFromPtr(dest)) { + @branchHint(.unlikely); + copyForward(dest, src, len); + } else if (@intFromPtr(src) + len > @intFromPtr(dest)) { + @branchHint(.unlikely); + overlapBwd(dest, src, len); + } else { + copyForward(dest, src, len); + } +} + +inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void { + const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*; + + const N: usize = len & ~@as(usize, 127); + var i: usize = 0; + + while (i < N) : (i += 128) { + dest[i..][0..32].* = src[i..][0..32].*; + dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*; + dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*; + dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*; + } + + if (len - i <= 32) { + @branchHint(.unlikely); + dest[len - 32 ..][0..32].* = tail; + } else { + copyLong(dest[i..], src[i..], len - i); + } +} + +inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void { + var array: [5]@Vector(32, u8) = undefined; + array[0] = src[len - 32 ..][0..32].*; + inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*; + + const end: usize = (@intFromPtr(dest) + len - 32) & 31; + const range = len - end; + var s = src + range; + var d = dest + range; + + while (@intFromPtr(s) > @intFromPtr(src + 128)) { + // zig fmt: off + const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*; + const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*; + const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*; + const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*; + + @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first; + @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second; + @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third; + @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth; + // zig fmt: on + + s -= 128; + d -= 128; + } + + inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec; + dest[len - 32 ..][0..32].* = array[0]; +} diff --git a/lib/compiler_rt/memmove.zig b/lib/compiler_rt/memmove.zig deleted file mode 100644 index 75f9e46d16..0000000000 --- a/lib/compiler_rt/memmove.zig +++ /dev/null @@ -1,25 +0,0 @@ -const std = @import("std"); -const common = @import("./common.zig"); - -comptime { - @export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility }); -} - -pub fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) callconv(.C) ?[*]u8 { - @setRuntimeSafety(false); - - if (@intFromPtr(dest) < @intFromPtr(src)) { - var index: usize = 0; - while (index != n) : (index += 1) { - dest.?[index] = src.?[index]; - } - } else { - var index = n; - while (index != 0) { - index -= 1; - dest.?[index] = src.?[index]; - } - } - - return dest; -}