From 193604c837df75ab0c3fa5860f8b234263fe5b50 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sat, 29 Jun 2019 11:32:26 -0500 Subject: [PATCH] stage1: add @shuffle() shufflevector support I change the semantics of the mask operand, to make it a little more flexible. There is no real danger in this because it is a compile-error if you do it the LLVM way (and there is an appropiate error to tell you this). v2: avoid problems with double-free --- doc/langref.html.in | 22 +++ src/all_types.hpp | 11 ++ src/codegen.cpp | 32 ++++ src/ir.cpp | 274 +++++++++++++++++++++++++++++++ src/ir_print.cpp | 17 ++ test/compile_errors.zig | 13 ++ test/stage1/behavior/shuffle.zig | 57 +++++++ 7 files changed, 426 insertions(+) create mode 100644 test/stage1/behavior/shuffle.zig diff --git a/doc/langref.html.in b/doc/langref.html.in index 374fbfcde5..7ae0ee7c1c 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -8226,6 +8226,28 @@ fn foo(comptime T: type, ptr: *T) T { {#link|pointer|Pointers#}.

{#header_close#} + + {#header_open|@shuffle#} +
{#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}
+

+ Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#} + (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}. + Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select + from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#} + operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#} + value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#} + then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and + the relevent LLVM Documentation on + {#syntax#}shufflevector{#endsyntax#}, although note that the mask values are interpreted differently than in LLVM-IR. + Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match. + The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector, + and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an + {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.

+

+ {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a + {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length. +

+ {#header_close#} {#header_close#} {#header_open|Build Mode#} diff --git a/src/all_types.hpp b/src/all_types.hpp index e682eb8de1..deb56cbb40 100644 --- a/src/all_types.hpp +++ b/src/all_types.hpp @@ -1611,6 +1611,7 @@ enum BuiltinFnId { BuiltinFnIdIntToEnum, BuiltinFnIdIntType, BuiltinFnIdVectorType, + BuiltinFnIdShuffle, BuiltinFnIdSetCold, BuiltinFnIdSetRuntimeSafety, BuiltinFnIdSetFloatMode, @@ -2428,6 +2429,7 @@ enum IrInstructionId { IrInstructionIdBoolToInt, IrInstructionIdIntType, IrInstructionIdVectorType, + IrInstructionIdShuffleVector, IrInstructionIdBoolNot, IrInstructionIdMemset, IrInstructionIdMemcpy, @@ -3669,6 +3671,15 @@ struct IrInstructionVectorToArray { IrInstruction *result_loc; }; +struct IrInstructionShuffleVector { + IrInstruction base; + + IrInstruction *scalar_type; + IrInstruction *a; + IrInstruction *b; + IrInstruction *mask; // This is in zig-format, not llvm format +}; + struct IrInstructionAssertZero { IrInstruction base; diff --git a/src/codegen.cpp b/src/codegen.cpp index e4b47be8e5..2f1488635a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4581,6 +4581,35 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int); } +static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) { + uint64_t len_a = instruction->a->value.type->data.vector.len; + uint64_t len_c = instruction->mask->value.type->data.vector.len; + + // LLVM uses integers larger than the length of the first array to + // index into the second array. This was deemed unnecessarily fragile + // when changing code, so Zig uses negative numbers to index the + // second vector. These start at -1 and go down, and are easiest to use + // with the ~ operator. Here we convert between the two formats. + IrInstruction *mask = instruction->mask; + LLVMValueRef *values = allocate(len_c); + for (uint64_t i = 0;i < len_c;i++) { + if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) { + values[i] = LLVMGetUndef(LLVMInt32Type()); + } else { + int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint); + if (v < 0) + v = (uint32_t)~v + (uint32_t)len_a; + values[i] = LLVMConstInt(LLVMInt32Type(), v, false); + } + } + + return LLVMBuildShuffleVector(g->builder, + ir_llvm_value(g, instruction->a), + ir_llvm_value(g, instruction->b), + LLVMConstVector(values, len_c), + ""); +} + static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) { ZigType *int_type = instruction->op->value.type; LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount); @@ -6095,6 +6124,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable, return ir_render_spill_begin(g, executable, (IrInstructionSpillBegin *)instruction); case IrInstructionIdSpillEnd: return ir_render_spill_end(g, executable, (IrInstructionSpillEnd *)instruction); + case IrInstructionIdShuffleVector: + return ir_render_shuffle_vector(g, executable, (IrInstructionShuffleVector *) instruction); } zig_unreachable(); } @@ -7785,6 +7816,7 @@ static void define_builtin_fns(CodeGen *g) { create_builtin_fn(g, BuiltinFnIdCompileLog, "compileLog", SIZE_MAX); create_builtin_fn(g, BuiltinFnIdIntType, "IntType", 2); // TODO rename to Int create_builtin_fn(g, BuiltinFnIdVectorType, "Vector", 2); + create_builtin_fn(g, BuiltinFnIdShuffle, "shuffle", 4); create_builtin_fn(g, BuiltinFnIdSetCold, "setCold", 1); create_builtin_fn(g, BuiltinFnIdSetRuntimeSafety, "setRuntimeSafety", 1); create_builtin_fn(g, BuiltinFnIdSetFloatMode, "setFloatMode", 1); diff --git a/src/ir.cpp b/src/ir.cpp index 6de08de913..f62a58e37e 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -717,6 +717,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionVectorType *) { return IrInstructionIdVectorType; } +static constexpr IrInstructionId ir_instruction_id(IrInstructionShuffleVector *) { + return IrInstructionIdShuffleVector; +} + static constexpr IrInstructionId ir_instruction_id(IrInstructionBoolNot *) { return IrInstructionIdBoolNot; } @@ -2277,6 +2281,25 @@ static IrInstruction *ir_build_vector_type(IrBuilder *irb, Scope *scope, AstNode return &instruction->base; } +static IrInstruction *ir_build_shuffle_vector(IrBuilder *irb, Scope *scope, AstNode *source_node, + IrInstruction *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) +{ + IrInstructionShuffleVector *instruction = ir_build_instruction(irb, scope, source_node); + instruction->scalar_type = scalar_type; + instruction->a = a; + instruction->b = b; + instruction->mask = mask; + + if (scalar_type != nullptr) { + ir_ref_instruction(scalar_type, irb->current_basic_block); + } + ir_ref_instruction(a, irb->current_basic_block); + ir_ref_instruction(b, irb->current_basic_block); + ir_ref_instruction(mask, irb->current_basic_block); + + return &instruction->base; +} + static IrInstruction *ir_build_bool_not(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) { IrInstructionBoolNot *instruction = ir_build_instruction(irb, scope, source_node); instruction->value = value; @@ -4936,6 +4959,32 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo IrInstruction *vector_type = ir_build_vector_type(irb, scope, node, arg0_value, arg1_value); return ir_lval_wrap(irb, scope, vector_type, lval, result_loc); } + case BuiltinFnIdShuffle: + { + AstNode *arg0_node = node->data.fn_call_expr.params.at(0); + IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope); + if (arg0_value == irb->codegen->invalid_instruction) + return arg0_value; + + AstNode *arg1_node = node->data.fn_call_expr.params.at(1); + IrInstruction *arg1_value = ir_gen_node(irb, arg1_node, scope); + if (arg1_value == irb->codegen->invalid_instruction) + return arg1_value; + + AstNode *arg2_node = node->data.fn_call_expr.params.at(2); + IrInstruction *arg2_value = ir_gen_node(irb, arg2_node, scope); + if (arg2_value == irb->codegen->invalid_instruction) + return arg2_value; + + AstNode *arg3_node = node->data.fn_call_expr.params.at(3); + IrInstruction *arg3_value = ir_gen_node(irb, arg3_node, scope); + if (arg3_value == irb->codegen->invalid_instruction) + return arg3_value; + + IrInstruction *shuffle_vector = ir_build_shuffle_vector(irb, scope, node, + arg0_value, arg1_value, arg2_value, arg3_value); + return ir_lval_wrap(irb, scope, shuffle_vector, lval, result_loc); + } case BuiltinFnIdMemcpy: { AstNode *arg0_node = node->data.fn_call_expr.params.at(0); @@ -22063,6 +22112,228 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr return ir_const_type(ira, &instruction->base, vector_type); } +static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr, + ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) { + assert(source_instr && scalar_type && a && b && mask); + assert(scalar_type->id == ZigTypeIdBool || + scalar_type->id == ZigTypeIdInt || + scalar_type->id == ZigTypeIdFloat || + scalar_type->id == ZigTypeIdPointer); + + ZigType *mask_type = mask->value.type; + if (type_is_invalid(mask_type)) + return ira->codegen->invalid_instruction; + + const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'"; + + if (mask_type->id == ZigTypeIdArray) { + ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type); + mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type); + if (!mask) + return ira->codegen->invalid_instruction; + mask_type = vector_type; + } + + if (mask_type->id != ZigTypeIdVector) { + ir_add_error(ira, mask, + buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name))); + return ira->codegen->invalid_instruction; + } + + ZigType *mask_scalar_type = mask_type->data.array.child_type; + if (mask_scalar_type->id != ZigTypeIdInt) { + ir_add_error(ira, mask, + buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name))); + return ira->codegen->invalid_instruction; + } + + if (mask_scalar_type->data.integral.bit_count != 32 || + mask_scalar_type->data.integral.is_signed == false) { + ir_add_error(ira, mask, + buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name))); + return ira->codegen->invalid_instruction; + } + + uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len; + if (a->value.type->id != ZigTypeIdVector) { + if (a->value.type->id != ZigTypeIdUndefined) { + ir_add_error(ira, a, + buf_sprintf("expected vector of element type '%s' got '%s'", + buf_ptr(&scalar_type->name), + buf_ptr(&a->value.type->name))); + return ira->codegen->invalid_instruction; + } + } else { + len_a = a->value.type->data.vector.len; + } + + if (b->value.type->id != ZigTypeIdVector) { + if (b->value.type->id != ZigTypeIdUndefined) { + ir_add_error(ira, b, + buf_sprintf("expected vector of element type '%s' got '%s'", + buf_ptr(&scalar_type->name), + buf_ptr(&b->value.type->name))); + return ira->codegen->invalid_instruction; + } + } else { + len_b = b->value.type->data.vector.len; + } + + if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) { + return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type)); + } + + // undefined is a vector up to length of the other vector. + if (a->value.type->id == ZigTypeIdUndefined) { + a = ir_const_undef(ira, a, b->value.type); + len_a = b->value.type->data.vector.len; + } else if (b->value.type->id == ZigTypeIdUndefined) { + b = ir_const_undef(ira, b, a->value.type); + len_b = a->value.type->data.vector.len; + } + + // FIXME I think this needs to be more sophisticated + if (a->value.type->data.vector.elem_type != scalar_type) { + ir_add_error(ira, a, + buf_sprintf("element type '%s' does not match '%s'", + buf_ptr(&a->value.type->data.vector.elem_type->name), + buf_ptr(&scalar_type->name))); + return ira->codegen->invalid_instruction; + } + if (b->value.type->data.vector.elem_type != scalar_type) { + ir_add_error(ira, b, + buf_sprintf("element type '%s' does not match '%s'", + buf_ptr(&b->value.type->data.vector.elem_type->name), + buf_ptr(&scalar_type->name))); + return ira->codegen->invalid_instruction; + } + + if (a->value.type != b->value.type) { + assert(len_a != len_b); + uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b); + bool expand_b = len_b < len_a; + IrInstruction *expand_mask = ir_const(ira, mask, + get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32)); + expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max); + uint32_t i = 0; + for (; i < len_min; i++) + bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i); + for (; i < len_max; i++) + bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1); + IrInstruction *undef = ir_const_undef(ira, source_instr, + get_vector_type(ira->codegen, len_min, scalar_type)); + if (expand_b) { + if (instr_is_comptime(b)) { + ConstExprValue *old = b->value.data.x_array.data.s_none.elements; + b->value.data.x_array.data.s_none.elements = + allocate(len_a); + memcpy(b->value.data.x_array.data.s_none.elements, old, + b->value.type->data.vector.len * sizeof(ConstExprValue)); + } else { + b = ir_build_shuffle_vector(&ira->new_irb, + source_instr->scope, source_instr->source_node, + nullptr, b, undef, expand_mask); + b->value.special = ConstValSpecialRuntime; + } + b->value.type = get_vector_type(ira->codegen, len_max, scalar_type); + } else { + if (instr_is_comptime(a)) { + ConstExprValue *old = a->value.data.x_array.data.s_none.elements; + a->value.data.x_array.data.s_none.elements = + allocate(len_b); + memcpy(a->value.data.x_array.data.s_none.elements, old, + a->value.type->data.vector.len * sizeof(ConstExprValue)); + } else { + a = ir_build_shuffle_vector(&ira->new_irb, + source_instr->scope, source_instr->source_node, + nullptr, a, undef, expand_mask); + a->value.special = ConstValSpecialRuntime; + } + a->value.type = get_vector_type(ira->codegen, len_max, scalar_type); + } + } + ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk); + if (!mask_val) { + ir_add_error(ira, mask, + buf_sprintf("mask must be comptime")); + return ira->codegen->invalid_instruction; + } + for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) { + if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) + continue; + int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint); + if (v >= 0 && (uint64_t)v + 1 > len_a) { + ErrorMsg *msg = ir_add_error(ira, mask, + buf_sprintf("mask index out of bounds")); + add_error_note(ira->codegen, msg, mask->source_node, + buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i)); + if ((uint64_t)v <= len_a + len_b) + add_error_note(ira->codegen, msg, mask->source_node, + buf_sprintf("selections from the second vector are specified with negative numbers")); + } else if (v < 0 && (uint64_t)~v + 1 > len_b) { + ErrorMsg *msg = ir_add_error(ira, mask, + buf_sprintf("mask index out of bounds")); + add_error_note(ira->codegen, msg, mask->source_node, + buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i)); + } + else + continue; + return ira->codegen->invalid_instruction; + } + + ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type); + if (instr_is_comptime(a) && + instr_is_comptime(b)) { + IrInstruction *result = ir_const(ira, source_instr, result_type); + result->value.data.x_array.data.s_none.elements = create_const_vals(len_c); + for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) { + if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) + result->value.data.x_array.data.s_none.elements[i].special = + ConstValSpecialUndef; + int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint); + if (v >= 0) + result->value.data.x_array.data.s_none.elements[i] = + a->value.data.x_array.data.s_none.elements[v]; + else if (v < 0) + result->value.data.x_array.data.s_none.elements[i] = + b->value.data.x_array.data.s_none.elements[~v]; + else + zig_unreachable(); + result->value.data.x_array.data.s_none.elements[i].special = + ConstValSpecialStatic; + } + result->value.special = ConstValSpecialStatic; + return result; + } + + // All static analysis passed, and not comptime + IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb, + source_instr->scope, source_instr->source_node, + nullptr, a, b, mask); + result->value.type = result_type; + result->value.special = ConstValSpecialRuntime; + return result; +} + +static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) { + ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type); + assert(scalar_type); + if (type_is_invalid(scalar_type)) + return ira->codegen->invalid_instruction; + + if (scalar_type->id != ZigTypeIdBool && + scalar_type->id != ZigTypeIdInt && + scalar_type->id != ZigTypeIdFloat && + scalar_type->id != ZigTypeIdPointer) { + ir_add_error(ira, instruction->scalar_type, + buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid", + buf_ptr(&scalar_type->name))); + return ira->codegen->invalid_instruction; + } + + return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child); +} + static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) { IrInstruction *value = instruction->value->child; if (type_is_invalid(value->value.type)) @@ -25607,6 +25878,8 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction return ir_analyze_instruction_int_type(ira, (IrInstructionIntType *)instruction); case IrInstructionIdVectorType: return ir_analyze_instruction_vector_type(ira, (IrInstructionVectorType *)instruction); + case IrInstructionIdShuffleVector: + return ir_analyze_instruction_shuffle_vector(ira, (IrInstructionShuffleVector *)instruction); case IrInstructionIdBoolNot: return ir_analyze_instruction_bool_not(ira, (IrInstructionBoolNot *)instruction); case IrInstructionIdMemset: @@ -25942,6 +26215,7 @@ bool ir_has_side_effects(IrInstruction *instruction) { case IrInstructionIdTruncate: case IrInstructionIdIntType: case IrInstructionIdVectorType: + case IrInstructionIdShuffleVector: case IrInstructionIdBoolNot: case IrInstructionIdSliceSrc: case IrInstructionIdMemberCount: diff --git a/src/ir_print.cpp b/src/ir_print.cpp index f2877b46e6..8561ed4508 100644 --- a/src/ir_print.cpp +++ b/src/ir_print.cpp @@ -42,6 +42,8 @@ static const char* ir_instruction_type_str(IrInstruction* instruction) { switch (instruction->id) { case IrInstructionIdInvalid: return "Invalid"; + case IrInstructionIdShuffleVector: + return "Shuffle"; case IrInstructionIdDeclVarSrc: return "DeclVarSrc"; case IrInstructionIdDeclVarGen: @@ -1208,6 +1210,18 @@ static void ir_print_vector_type(IrPrint *irp, IrInstructionVectorType *instruct fprintf(irp->f, ")"); } +static void ir_print_shuffle_vector(IrPrint *irp, IrInstructionShuffleVector *instruction) { + fprintf(irp->f, "@shuffle("); + ir_print_other_instruction(irp, instruction->scalar_type); + fprintf(irp->f, ", "); + ir_print_other_instruction(irp, instruction->a); + fprintf(irp->f, ", "); + ir_print_other_instruction(irp, instruction->b); + fprintf(irp->f, ", "); + ir_print_other_instruction(irp, instruction->mask); + fprintf(irp->f, ")"); +} + static void ir_print_bool_not(IrPrint *irp, IrInstructionBoolNot *instruction) { fprintf(irp->f, "! "); ir_print_other_instruction(irp, instruction->value); @@ -2143,6 +2157,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction, bool case IrInstructionIdVectorType: ir_print_vector_type(irp, (IrInstructionVectorType *)instruction); break; + case IrInstructionIdShuffleVector: + ir_print_shuffle_vector(irp, (IrInstructionShuffleVector *)instruction); + break; case IrInstructionIdBoolNot: ir_print_bool_not(irp, (IrInstructionBoolNot *)instruction); break; diff --git a/test/compile_errors.zig b/test/compile_errors.zig index 9d96d6f948..d9b4ee6a95 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -6484,6 +6484,19 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { "tmp.zig:7:23: error: unable to evaluate constant expression", ); + cases.addTest( + "using LLVM syntax for @shuffle", + \\export fn entry() void { + \\ const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3}; + \\ const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7}; + \\ var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7}); + \\} + , + "tmp.zig:4:39: error: mask index out of bounds", + "tmp.zig:4:39: note: when computing vector element at index 4", + "tmp.zig:4:39: note: selections from the second vector are specified with negative numbers", + ); + cases.addTest( "nested vectors", \\export fn entry() void { diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig new file mode 100644 index 0000000000..70bff5991e --- /dev/null +++ b/test/stage1/behavior/shuffle.zig @@ -0,0 +1,57 @@ +const std = @import("std"); +const mem = std.mem; +const expect = std.testing.expect; + +test "@shuffle" { + const S = struct { + fn doTheTest() void { + var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 }; + var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 }; + const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3)}; + var res = @shuffle(i32, v, x, mask); + expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 })); + + // Implicit cast from array (of mask) + res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3)}); + expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 })); + + // Undefined + const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0}; + res = @shuffle(i32, v, undefined, mask2); + expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647})); + + // Upcasting of b + var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined}; + const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3}; + res = @shuffle(i32, x, v2, mask3); + expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 })); + + // Upcasting of a + var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2}; + const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3)}; + res = @shuffle(i32, v3, x, mask4); + expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 })); + + // bool + { + var x2: @Vector(4, bool) = [4]bool{ false, true, false, true}; + var v4: @Vector(2, bool) = [2]bool{ true, false}; + const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2}; + var res2 = @shuffle(bool, x2, v4, mask5); + expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false })); + } + + // FIXME re-enable when LLVM codegen is fixed + // https://bugs.llvm.org/show_bug.cgi?id=42803 + if (false) { + var x2: @Vector(3, bool) = [3]bool{ false, true, false}; + var v4: @Vector(2, bool) = [2]bool{ true, false}; + const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2}; + var res2 = @shuffle(bool, x2, v4, mask5); + expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false })); + } + } + }; + S.doTheTest(); + comptime S.doTheTest(); +}