mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 14:23:09 +00:00
std.simd: return comptime_int from suggestVectorSize
This commit is contained in:
parent
f09313dbc4
commit
c919e9a280
@ -82,7 +82,7 @@ pub const HeadersParser = struct {
|
|||||||
/// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the
|
/// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the
|
||||||
/// first byte of content is located at `bytes[result]`.
|
/// first byte of content is located at `bytes[result]`.
|
||||||
pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
|
pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
|
||||||
const vector_len: comptime_int = comptime @max(std.simd.suggestVectorSize(u8) orelse 1, 8);
|
const vector_len: comptime_int = @max(std.simd.suggestVectorSize(u8) orelse 1, 8);
|
||||||
const len = @as(u32, @intCast(bytes.len));
|
const len = @as(u32, @intCast(bytes.len));
|
||||||
var index: u32 = 0;
|
var index: u32 = 0;
|
||||||
|
|
||||||
|
|||||||
@ -974,7 +974,7 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
|
|||||||
// The below branch assumes that reading past the end of the buffer is valid, as long
|
// The below branch assumes that reading past the end of the buffer is valid, as long
|
||||||
// as we don't read into a new page. This should be the case for most architectures
|
// as we don't read into a new page. This should be the case for most architectures
|
||||||
// which use paged memory, however should be confirmed before adding a new arch below.
|
// which use paged memory, however should be confirmed before adding a new arch below.
|
||||||
.aarch64, .x86, .x86_64 => if (comptime std.simd.suggestVectorSize(T)) |block_len| {
|
.aarch64, .x86, .x86_64 => if (std.simd.suggestVectorSize(T)) |block_len| {
|
||||||
comptime std.debug.assert(std.mem.page_size % block_len == 0);
|
comptime std.debug.assert(std.mem.page_size % block_len == 0);
|
||||||
const Block = @Vector(block_len, T);
|
const Block = @Vector(block_len, T);
|
||||||
const mask: Block = @splat(sentinel);
|
const mask: Block = @splat(sentinel);
|
||||||
@ -1027,7 +1027,7 @@ test "indexOfSentinel vector paths" {
|
|||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
|
|
||||||
inline for (Types) |T| {
|
inline for (Types) |T| {
|
||||||
const block_len = comptime std.simd.suggestVectorSize(T) orelse continue;
|
const block_len = std.simd.suggestVectorSize(T) orelse continue;
|
||||||
|
|
||||||
// Allocate three pages so we guarantee a page-crossing address with a full page after
|
// Allocate three pages so we guarantee a page-crossing address with a full page after
|
||||||
const memory = try allocator.alloc(T, 3 * std.mem.page_size / @sizeOf(T));
|
const memory = try allocator.alloc(T, 3 * std.mem.page_size / @sizeOf(T));
|
||||||
@ -1118,11 +1118,11 @@ pub fn indexOfScalarPos(comptime T: type, slice: []const T, start_index: usize,
|
|||||||
!@inComptime() and
|
!@inComptime() and
|
||||||
(@typeInfo(T) == .Int or @typeInfo(T) == .Float) and std.math.isPowerOfTwo(@bitSizeOf(T)))
|
(@typeInfo(T) == .Int or @typeInfo(T) == .Float) and std.math.isPowerOfTwo(@bitSizeOf(T)))
|
||||||
{
|
{
|
||||||
if (comptime std.simd.suggestVectorSize(T)) |block_len| {
|
if (std.simd.suggestVectorSize(T)) |block_len| {
|
||||||
// For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result
|
// For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result
|
||||||
// in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning.
|
// in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning.
|
||||||
//
|
//
|
||||||
// Use `comptime std.simd.suggestVectorSize(T)` to get the same alignment as used in this function
|
// Use `std.simd.suggestVectorSize(T)` to get the same alignment as used in this function
|
||||||
// however this usually isn't necessary unless your arch has a performance penalty due to this.
|
// however this usually isn't necessary unless your arch has a performance penalty due to this.
|
||||||
//
|
//
|
||||||
// This may differ for other arch's. Arm for example costs a cycle when loading across a cache
|
// This may differ for other arch's. Arm for example costs a cycle when loading across a cache
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const builtin = @import("builtin");
|
const builtin = @import("builtin");
|
||||||
|
|
||||||
pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?usize {
|
pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int {
|
||||||
// This is guesswork, if you have better suggestions can add it or edit the current here
|
// This is guesswork, if you have better suggestions can add it or edit the current here
|
||||||
// This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
|
// This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
|
||||||
const element_bit_size = @max(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable);
|
const element_bit_size = @max(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable);
|
||||||
@ -55,7 +55,7 @@ pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?
|
|||||||
|
|
||||||
/// Suggests a target-dependant vector size for a given type, or null if scalars are recommended.
|
/// Suggests a target-dependant vector size for a given type, or null if scalars are recommended.
|
||||||
/// Not yet implemented for every CPU architecture.
|
/// Not yet implemented for every CPU architecture.
|
||||||
pub fn suggestVectorSize(comptime T: type) ?usize {
|
pub fn suggestVectorSize(comptime T: type) ?comptime_int {
|
||||||
return suggestVectorSizeForCpu(T, builtin.cpu);
|
return suggestVectorSizeForCpu(T, builtin.cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -200,7 +200,7 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
|
|||||||
pub fn utf8ValidateSlice(input: []const u8) bool {
|
pub fn utf8ValidateSlice(input: []const u8) bool {
|
||||||
var remaining = input;
|
var remaining = input;
|
||||||
|
|
||||||
const V_len = comptime std.simd.suggestVectorSize(usize) orelse 1;
|
const V_len = std.simd.suggestVectorSize(usize) orelse 1;
|
||||||
const V = @Vector(V_len, usize);
|
const V = @Vector(V_len, usize);
|
||||||
const u8s_in_vector = @sizeOf(usize) * V_len;
|
const u8s_in_vector = @sizeOf(usize) * V_len;
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user