diff --git a/lib/std/crypto/ghash_polyval.zig b/lib/std/crypto/ghash_polyval.zig index 6ccbf1f228..b89e4b1c59 100644 --- a/lib/std/crypto/ghash_polyval.zig +++ b/lib/std/crypto/ghash_polyval.zig @@ -158,11 +158,7 @@ fn Hash(comptime endian: std.builtin.Endian, comptime shift_key: bool) type { /// clmulSoft128_64 is faster on platforms with no native 128-bit registers. const clmulSoft = switch (builtin.cpu.arch) { .wasm32, .wasm64 => clmulSoft128_64, - else => impl: { - const vector_size = std.simd.suggestVectorSize(u128) orelse 0; - if (vector_size < 128) break :impl clmulSoft128_64; - break :impl clmulSoft128; - }, + else => if (std.simd.suggestVectorLength(u128) != null) clmulSoft128 else clmulSoft128_64, }; // Software carryless multiplication of two 64-bit integers using native 128-bit registers. diff --git a/lib/std/http/protocol.zig b/lib/std/http/protocol.zig index e757095803..227b376ed3 100644 --- a/lib/std/http/protocol.zig +++ b/lib/std/http/protocol.zig @@ -84,7 +84,7 @@ pub const HeadersParser = struct { /// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the /// first byte of content is located at `bytes[result]`. pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 { - const vector_len: comptime_int = @max(std.simd.suggestVectorSize(u8) orelse 1, 8); + const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8); const len: u32 = @intCast(bytes.len); var index: u32 = 0; diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 22e543ad8b..c46f7bd953 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1032,15 +1032,16 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co // The below branch assumes that reading past the end of the buffer is valid, as long // as we don't read into a new page. This should be the case for most architectures // which use paged memory, however should be confirmed before adding a new arch below. - .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorSize(T)) |block_len| { - comptime std.debug.assert(std.mem.page_size % block_len == 0); + .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorLength(T)) |block_len| { const Block = @Vector(block_len, T); const mask: Block = @splat(sentinel); + comptime std.debug.assert(std.mem.page_size % @sizeOf(Block) == 0); + // First block may be unaligned const start_addr = @intFromPtr(&p[i]); const offset_in_page = start_addr & (std.mem.page_size - 1); - if (offset_in_page < std.mem.page_size - block_len) { + if (offset_in_page <= std.mem.page_size - @sizeOf(Block)) { // Will not read past the end of a page, full block. const block: Block = p[i..][0..block_len].*; const matches = block == mask; @@ -1085,7 +1086,7 @@ test "indexOfSentinel vector paths" { const allocator = std.testing.allocator; inline for (Types) |T| { - const block_len = std.simd.suggestVectorSize(T) orelse continue; + const block_len = std.simd.suggestVectorLength(T) orelse continue; // Allocate three pages so we guarantee a page-crossing address with a full page after const memory = try allocator.alloc(T, 3 * std.mem.page_size / @sizeOf(T)); @@ -1176,11 +1177,11 @@ pub fn indexOfScalarPos(comptime T: type, slice: []const T, start_index: usize, !@inComptime() and (@typeInfo(T) == .Int or @typeInfo(T) == .Float) and std.math.isPowerOfTwo(@bitSizeOf(T))) { - if (std.simd.suggestVectorSize(T)) |block_len| { + if (std.simd.suggestVectorLength(T)) |block_len| { // For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result // in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning. // - // Use `std.simd.suggestVectorSize(T)` to get the same alignment as used in this function + // Use `std.simd.suggestVectorLength(T)` to get the same alignment as used in this function // however this usually isn't necessary unless your arch has a performance penalty due to this. // // This may differ for other arch's. Arm for example costs a cycle when loading across a cache diff --git a/lib/std/simd.zig b/lib/std/simd.zig index 72b71938cb..c3ae2d8dbe 100644 --- a/lib/std/simd.zig +++ b/lib/std/simd.zig @@ -6,7 +6,9 @@ const std = @import("std"); const builtin = @import("builtin"); -pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int { +pub const suggestVectorSizeForCpu = @compileError("deprecated; use 'suggestVectorLengthForCpu'"); + +pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int { // This is guesswork, if you have better suggestions can add it or edit the current here // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it const element_bit_size = @max(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable); @@ -53,24 +55,26 @@ pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ? return @divExact(vector_bit_size, element_bit_size); } -/// Suggests a target-dependant vector size for a given type, or null if scalars are recommended. +pub const suggestVectorSize = @compileError("deprecated; use 'suggestVectorLength'"); + +/// Suggests a target-dependant vector length for a given type, or null if scalars are recommended. /// Not yet implemented for every CPU architecture. -pub fn suggestVectorSize(comptime T: type) ?comptime_int { - return suggestVectorSizeForCpu(T, builtin.cpu); +pub fn suggestVectorLength(comptime T: type) ?comptime_int { + return suggestVectorLengthForCpu(T, builtin.cpu); } -test "suggestVectorSizeForCpu works with signed and unsigned values" { +test "suggestVectorLengthForCpu works with signed and unsigned values" { comptime var cpu = std.Target.Cpu.baseline(std.Target.Cpu.Arch.x86_64); comptime cpu.features.addFeature(@intFromEnum(std.Target.x86.Feature.avx512f)); comptime cpu.features.populateDependencies(&std.Target.x86.all_features); - const expected_size: usize = switch (builtin.zig_backend) { + const expected_len: usize = switch (builtin.zig_backend) { .stage2_x86_64 => 8, else => 16, }; - const signed_integer_size = suggestVectorSizeForCpu(i32, cpu).?; - const unsigned_integer_size = suggestVectorSizeForCpu(u32, cpu).?; - try std.testing.expectEqual(expected_size, unsigned_integer_size); - try std.testing.expectEqual(expected_size, signed_integer_size); + const signed_integer_len = suggestVectorLengthForCpu(i32, cpu).?; + const unsigned_integer_len = suggestVectorLengthForCpu(u32, cpu).?; + try std.testing.expectEqual(expected_len, unsigned_integer_len); + try std.testing.expectEqual(expected_len, signed_integer_len); } fn vectorLength(comptime VectorType: type) comptime_int { @@ -232,7 +236,7 @@ test "vector patterns" { } } -/// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the size of a and b. +/// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the length of a and b. pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a, b))) @TypeOf(a, b) { const len = vectorLength(@TypeOf(a, b)); @@ -240,7 +244,7 @@ pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a, } /// Elements are shifted rightwards (towards higher indices). New elements are added to the left, and the rightmost elements are cut off -/// so that the size of the vector stays the same. +/// so that the length of the vector stays the same. pub fn shiftElementsRight(vec: anytype, comptime amount: VectorCount(@TypeOf(vec)), shift_in: std.meta.Child(@TypeOf(vec))) @TypeOf(vec) { // It may be possible to implement shifts and rotates with a runtime-friendly slice of two joined vectors, as the length of the // slice would be comptime-known. This would permit vector shifts and rotates by a non-comptime-known amount. diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 07aa63cac8..eacb62613a 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -202,7 +202,7 @@ pub fn utf8CountCodepoints(s: []const u8) !usize { pub fn utf8ValidateSlice(input: []const u8) bool { var remaining = input; - const chunk_len = std.simd.suggestVectorSize(u8) orelse 1; + const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and skip ASCII characters at the start of the input. @@ -758,7 +758,7 @@ pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8 var remaining = utf16le; if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorSize(u16) orelse 1; + const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -801,7 +801,7 @@ pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0] var remaining = utf16le; if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorSize(u16) orelse 1; + const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -842,7 +842,7 @@ pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize { var remaining = utf16le; if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorSize(u16) orelse 1; + const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -941,7 +941,7 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1 var remaining = utf8; // Need support for std.simd.interlace if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) { - const chunk_len = std.simd.suggestVectorSize(u8) orelse 1; + const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -986,7 +986,7 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize { var remaining = utf8; // Need support for std.simd.interlace if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) { - const chunk_len = std.simd.suggestVectorSize(u8) orelse 1; + const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input.