From 0e3ca4c63ecb8e43af8261020d21bc6888d18fc0 Mon Sep 17 00:00:00 2001
From: Shawn Landden
Date: Thu, 25 Jul 2019 11:11:37 -0500
Subject: [PATCH 1/7] Fix array->vector and vector->array for many types. Allow
vector of bool.
Vectors do not have the same packing as arrays, and just bitcasting
is not the correct way to convert them.
---
src/analyze.cpp | 3 ++-
src/codegen.cpp | 28 ++++++++++++++--------
src/ir.cpp | 2 +-
test/compile_errors.zig | 2 +-
test/stage1/behavior/vector.zig | 41 +++++++++++++++++++++++++++++++++
5 files changed, 63 insertions(+), 13 deletions(-)
diff --git a/src/analyze.cpp b/src/analyze.cpp
index d5d8745018..ac70d5646f 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -4708,6 +4708,7 @@ ZigType *get_int_type(CodeGen *g, bool is_signed, uint32_t size_in_bits) {
bool is_valid_vector_elem_type(ZigType *elem_type) {
return elem_type->id == ZigTypeIdInt ||
elem_type->id == ZigTypeIdFloat ||
+ elem_type->id == ZigTypeIdBool ||
get_codegen_ptr_type(elem_type) != nullptr;
}
@@ -4727,7 +4728,7 @@ ZigType *get_vector_type(CodeGen *g, uint32_t len, ZigType *elem_type) {
ZigType *entry = new_type_table_entry(ZigTypeIdVector);
if ((len != 0) && type_has_bits(elem_type)) {
- // Vectors can only be ints, floats, or pointers. ints and floats have trivially resolvable
+ // Vectors can only be ints, floats, bools, or pointers. ints (inc. bools) and floats have trivially resolvable
// llvm type refs. pointers we will use usize instead.
LLVMTypeRef example_vector_llvm_type;
if (elem_type->id == ZigTypeIdPointer) {
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 4799c0a28f..1b86f95433 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -5549,10 +5549,14 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
assert(handle_is_ptr(array_type));
LLVMValueRef result_loc = ir_llvm_value(g, instruction->result_loc);
LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
- LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
- LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
- uint32_t alignment = get_ptr_align(g, instruction->result_loc->value.type);
- gen_store_untyped(g, vector, casted_ptr, alignment, false);
+ LLVMValueRef array = LLVMGetUndef(get_llvm_type(g, array_type));
+ for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
+ LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
+ LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector,
+ index, "vector_to_array");
+ array = LLVMBuildInsertValue(g->builder, array, elem, i, "");
+ }
+ LLVMBuildStore(g->builder, array, result_loc);
return result_loc;
}
@@ -5563,12 +5567,16 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
assert(vector_type->id == ZigTypeIdVector);
assert(!handle_is_ptr(vector_type));
LLVMValueRef array_ptr = ir_llvm_value(g, instruction->array);
- LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
- LLVMPointerType(get_llvm_type(g, vector_type), 0), "");
- ZigType *array_type = instruction->array->value.type;
- assert(array_type->id == ZigTypeIdArray);
- uint32_t alignment = get_abi_alignment(g, array_type->data.array.child_type);
- return gen_load_untyped(g, casted_ptr, alignment, false, "");
+ LLVMValueRef array = LLVMBuildLoad2(g->builder, get_llvm_type(g, instruction->array->value.type),
+ array_ptr, "");
+ LLVMValueRef vector = LLVMGetUndef(get_llvm_type(g, vector_type));
+ for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
+ LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
+ LLVMValueRef elem = LLVMBuildExtractValue(g->builder, array,
+ i, "vector_to_array");
+ vector = LLVMBuildInsertElement(g->builder, vector, elem, index, "");
+ }
+ return vector;
}
static LLVMValueRef ir_render_assert_zero(CodeGen *g, IrExecutable *executable,
diff --git a/src/ir.cpp b/src/ir.cpp
index ea9039a1b6..56866340c4 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -22024,7 +22024,7 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
if (!is_valid_vector_elem_type(elem_type)) {
ir_add_error(ira, instruction->elem_type,
- buf_sprintf("vector element type must be integer, float, or pointer; '%s' is invalid",
+ buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
buf_ptr(&elem_type->name)));
return ira->codegen->invalid_instruction;
}
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 6365ca64cb..9d96d6f948 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6491,7 +6491,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
\\ var v: V = undefined;
\\}
,
- "tmp.zig:2:26: error: vector element type must be integer, float, or pointer; '@Vector(4, u8)' is invalid",
+ "tmp.zig:2:26: error: vector element type must be integer, float, bool, or pointer; '@Vector(4, u8)' is invalid",
);
cases.add("compileLog of tagged enum doesn't crash the compiler",
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 431e3fe272..94d3aa1a45 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -2,6 +2,18 @@ const std = @import("std");
const mem = std.mem;
const expect = std.testing.expect;
+test "implicit cast vector to array - bool" {
+ const S = struct {
+ fn doTheTest() void {
+ const a: @Vector(4, bool) = [_]bool{ true, false, true, false };
+ const result_array: [4]bool = a;
+ expect(mem.eql(bool, result_array, [4]bool{ true, false, true, false }));
+ }
+ };
+ S.doTheTest();
+ comptime S.doTheTest();
+}
+
test "vector wrap operators" {
const S = struct {
fn doTheTest() void {
@@ -80,3 +92,32 @@ test "array to vector" {
var arr = [4]f32{ foo, 1.5, 0.0, 0.0 };
var vec: @Vector(4, f32) = arr;
}
+
+test "vector casts of sizes not divisable by 8" {
+ const S = struct {
+ fn doTheTest() void {
+ {
+ var v: @Vector(4, u3) = [4]u3{ 5, 2, 3, 0};
+ var x: [4]u3 = v;
+ expect(mem.eql(u3, x, ([4]u3)(v)));
+ }
+ {
+ var v: @Vector(4, u2) = [4]u2{ 1, 2, 3, 0};
+ var x: [4]u2 = v;
+ expect(mem.eql(u2, x, ([4]u2)(v)));
+ }
+ {
+ var v: @Vector(4, u1) = [4]u1{ 1, 0, 1, 0};
+ var x: [4]u1 = v;
+ expect(mem.eql(u1, x, ([4]u1)(v)));
+ }
+ {
+ var v: @Vector(4, bool) = [4]bool{ false, false, true, false};
+ var x: [4]bool = v;
+ expect(mem.eql(bool, x, ([4]bool)(v)));
+ }
+ }
+ };
+ S.doTheTest();
+ comptime S.doTheTest();
+}
From 558b4ac1f0fd7123ebe25f3e59eef275b066c50a Mon Sep 17 00:00:00 2001
From: Andrew Kelley
Date: Wed, 18 Sep 2019 10:24:28 -0400
Subject: [PATCH 2/7] adjust codegen of casting between arrays and vectors
* bitcasting is still better when the size_in_bits aligns with the ABI
size of the element type. Logic is reworked to do bitcasting when
possible
* rather than using insertelement/extractelement to work with arrays,
store/load elements directly. This matches codegen for arrays
elsewhere.
---
src/all_types.hpp | 2 +-
src/codegen.cpp | 65 +++++++++++++++++++++++++++++++++++------------
2 files changed, 50 insertions(+), 17 deletions(-)
diff --git a/src/all_types.hpp b/src/all_types.hpp
index 60b292662d..e682eb8de1 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1351,7 +1351,7 @@ struct ZigTypeBoundFn {
};
struct ZigTypeVector {
- // The type must be a pointer, integer, or float
+ // The type must be a pointer, integer, bool, or float
ZigType *elem_type;
uint32_t len;
};
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 1b86f95433..e4b47be8e5 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -5549,14 +5549,29 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
assert(handle_is_ptr(array_type));
LLVMValueRef result_loc = ir_llvm_value(g, instruction->result_loc);
LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
- LLVMValueRef array = LLVMGetUndef(get_llvm_type(g, array_type));
- for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
- LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
- LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector,
- index, "vector_to_array");
- array = LLVMBuildInsertValue(g->builder, array, elem, i, "");
+
+ ZigType *elem_type = array_type->data.array.child_type;
+ bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+ if (bitcast_ok) {
+ LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
+ LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
+ uint32_t alignment = get_ptr_align(g, instruction->result_loc->value.type);
+ gen_store_untyped(g, vector, casted_ptr, alignment, false);
+ } else {
+ // If the ABI size of the element type is not evenly divisible by size_in_bits, a simple bitcast
+ // will not work, and we fall back to extractelement.
+ LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+ LLVMTypeRef u32_type_ref = LLVMInt32Type();
+ LLVMValueRef zero = LLVMConstInt(usize_type_ref, 0, false);
+ for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
+ LLVMValueRef index_usize = LLVMConstInt(usize_type_ref, i, false);
+ LLVMValueRef index_u32 = LLVMConstInt(u32_type_ref, i, false);
+ LLVMValueRef indexes[] = { zero, index_usize };
+ LLVMValueRef elem_ptr = LLVMBuildInBoundsGEP(g->builder, result_loc, indexes, 2, "");
+ LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector, index_u32, "");
+ LLVMBuildStore(g->builder, elem, elem_ptr);
+ }
}
- LLVMBuildStore(g->builder, array, result_loc);
return result_loc;
}
@@ -5567,16 +5582,34 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
assert(vector_type->id == ZigTypeIdVector);
assert(!handle_is_ptr(vector_type));
LLVMValueRef array_ptr = ir_llvm_value(g, instruction->array);
- LLVMValueRef array = LLVMBuildLoad2(g->builder, get_llvm_type(g, instruction->array->value.type),
- array_ptr, "");
- LLVMValueRef vector = LLVMGetUndef(get_llvm_type(g, vector_type));
- for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
- LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
- LLVMValueRef elem = LLVMBuildExtractValue(g->builder, array,
- i, "vector_to_array");
- vector = LLVMBuildInsertElement(g->builder, vector, elem, index, "");
+ LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
+
+ ZigType *elem_type = vector_type->data.vector.elem_type;
+ bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+ if (bitcast_ok) {
+ LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
+ LLVMPointerType(vector_type_ref, 0), "");
+ ZigType *array_type = instruction->array->value.type;
+ assert(array_type->id == ZigTypeIdArray);
+ uint32_t alignment = get_abi_alignment(g, array_type->data.array.child_type);
+ return gen_load_untyped(g, casted_ptr, alignment, false, "");
+ } else {
+ // If the ABI size of the element type is not evenly divisible by size_in_bits, a simple bitcast
+ // will not work, and we fall back to insertelement.
+ LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+ LLVMTypeRef u32_type_ref = LLVMInt32Type();
+ LLVMValueRef zero = LLVMConstInt(usize_type_ref, 0, false);
+ LLVMValueRef vector = LLVMGetUndef(vector_type_ref);
+ for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
+ LLVMValueRef index_usize = LLVMConstInt(usize_type_ref, i, false);
+ LLVMValueRef index_u32 = LLVMConstInt(u32_type_ref, i, false);
+ LLVMValueRef indexes[] = { zero, index_usize };
+ LLVMValueRef elem_ptr = LLVMBuildInBoundsGEP(g->builder, array_ptr, indexes, 2, "");
+ LLVMValueRef elem = LLVMBuildLoad(g->builder, elem_ptr, "");
+ vector = LLVMBuildInsertElement(g->builder, vector, elem, index_u32, "");
+ }
+ return vector;
}
- return vector;
}
static LLVMValueRef ir_render_assert_zero(CodeGen *g, IrExecutable *executable,
From 74ce5e9e13014d2657bf00b5893fd4687c7f0359 Mon Sep 17 00:00:00 2001
From: Shawn Landden
Date: Wed, 31 Jul 2019 10:55:53 -0500
Subject: [PATCH 3/7] stage1: proper return type on vector comparisons
---
src/ir.cpp | 119 ++++++++++++++++++++------------
test/stage1/behavior/vector.zig | 17 +++++
2 files changed, 91 insertions(+), 45 deletions(-)
diff --git a/src/ir.cpp b/src/ir.cpp
index 56866340c4..b2a32c96d0 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13092,6 +13092,59 @@ static bool optional_value_is_null(ConstExprValue *val) {
}
}
+static IrInstruction *ir_evaluate_bin_op_cmp(IrAnalyze *ira, ZigType *resolved_type,
+ ConstExprValue *op1_val, ConstExprValue *op2_val, IrInstructionBinOp *bin_op_instruction, IrBinOp op_id,
+ bool one_possible_value) {
+ if (op1_val->special == ConstValSpecialUndef ||
+ op2_val->special == ConstValSpecialUndef)
+ return ir_const_undef(ira, &bin_op_instruction->base, resolved_type);
+ if (resolved_type->id == ZigTypeIdComptimeFloat || resolved_type->id == ZigTypeIdFloat) {
+ if (float_is_nan(op1_val) || float_is_nan(op2_val)) {
+ return ir_const_bool(ira, &bin_op_instruction->base, op_id == IrBinOpCmpNotEq);
+ }
+ Cmp cmp_result = float_cmp(op1_val, op2_val);
+ bool answer = resolve_cmp_op_id(op_id, cmp_result);
+ return ir_const_bool(ira, &bin_op_instruction->base, answer);
+ } else if (resolved_type->id == ZigTypeIdComptimeInt || resolved_type->id == ZigTypeIdInt) {
+ Cmp cmp_result = bigint_cmp(&op1_val->data.x_bigint, &op2_val->data.x_bigint);
+ bool answer = resolve_cmp_op_id(op_id, cmp_result);
+ return ir_const_bool(ira, &bin_op_instruction->base, answer);
+ } else if (resolved_type->id == ZigTypeIdPointer && op_id != IrBinOpCmpEq && op_id != IrBinOpCmpNotEq) {
+ if ((op1_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
+ op1_val->data.x_ptr.special == ConstPtrSpecialNull) &&
+ (op2_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
+ op2_val->data.x_ptr.special == ConstPtrSpecialNull))
+ {
+ uint64_t op1_addr = op1_val->data.x_ptr.special == ConstPtrSpecialNull ?
+ 0 : op1_val->data.x_ptr.data.hard_coded_addr.addr;
+ uint64_t op2_addr = op2_val->data.x_ptr.special == ConstPtrSpecialNull ?
+ 0 : op2_val->data.x_ptr.data.hard_coded_addr.addr;
+ Cmp cmp_result;
+ if (op1_addr > op2_addr) {
+ cmp_result = CmpGT;
+ } else if (op1_addr < op2_addr) {
+ cmp_result = CmpLT;
+ } else {
+ cmp_result = CmpEQ;
+ }
+ bool answer = resolve_cmp_op_id(op_id, cmp_result);
+ return ir_const_bool(ira, &bin_op_instruction->base, answer);
+ }
+ } else {
+ bool are_equal = one_possible_value || const_values_equal(ira->codegen, op1_val, op2_val);
+ bool answer;
+ if (op_id == IrBinOpCmpEq) {
+ answer = are_equal;
+ } else if (op_id == IrBinOpCmpNotEq) {
+ answer = !are_equal;
+ } else {
+ zig_unreachable();
+ }
+ return ir_const_bool(ira, &bin_op_instruction->base, answer);
+ }
+ zig_unreachable();
+}
+
// Returns ErrorNotLazy when the value cannot be determined
static Error lazy_cmp_zero(AstNode *source_node, ConstExprValue *val, Cmp *result) {
Error err;
@@ -13427,7 +13480,8 @@ static IrInstruction *ir_analyze_bin_op_cmp(IrAnalyze *ira, IrInstructionBinOp *
}
if (one_possible_value || (instr_is_comptime(casted_op1) && instr_is_comptime(casted_op2))) {
- {
+ // TODO do we need lazy values on vector comparisons?
+ if (resolved_type->id != ZigTypeIdVector) {
// Before resolving the values, we special case comparisons against zero. These can often be done
// without resolving lazy values, preventing potential dependency loops.
Cmp op1_cmp_zero;
@@ -13477,51 +13531,22 @@ never_mind_just_calculate_it_normally:
ConstExprValue *op2_val = one_possible_value ? &casted_op2->value : ir_resolve_const(ira, casted_op2, UndefBad);
if (op2_val == nullptr)
return ira->codegen->invalid_instruction;
+ if (resolved_type->id != ZigTypeIdVector)
+ return ir_evaluate_bin_op_cmp(ira, resolved_type, op1_val, op2_val, bin_op_instruction, op_id, one_possible_value);
+ IrInstruction *result = ir_const(ira, &bin_op_instruction->base,
+ get_vector_type(ira->codegen, resolved_type->data.vector.len, ira->codegen->builtin_types.entry_bool));
+ result->value.data.x_array.data.s_none.elements =
+ create_const_vals(resolved_type->data.vector.len);
- if (resolved_type->id == ZigTypeIdComptimeFloat || resolved_type->id == ZigTypeIdFloat) {
- if (float_is_nan(op1_val) || float_is_nan(op2_val)) {
- return ir_const_bool(ira, &bin_op_instruction->base, op_id == IrBinOpCmpNotEq);
- }
- Cmp cmp_result = float_cmp(op1_val, op2_val);
- bool answer = resolve_cmp_op_id(op_id, cmp_result);
- return ir_const_bool(ira, &bin_op_instruction->base, answer);
- } else if (resolved_type->id == ZigTypeIdComptimeInt || resolved_type->id == ZigTypeIdInt) {
- Cmp cmp_result = bigint_cmp(&op1_val->data.x_bigint, &op2_val->data.x_bigint);
- bool answer = resolve_cmp_op_id(op_id, cmp_result);
- return ir_const_bool(ira, &bin_op_instruction->base, answer);
- } else if (resolved_type->id == ZigTypeIdPointer && op_id != IrBinOpCmpEq && op_id != IrBinOpCmpNotEq) {
- if ((op1_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
- op1_val->data.x_ptr.special == ConstPtrSpecialNull) &&
- (op2_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
- op2_val->data.x_ptr.special == ConstPtrSpecialNull))
- {
- uint64_t op1_addr = op1_val->data.x_ptr.special == ConstPtrSpecialNull ?
- 0 : op1_val->data.x_ptr.data.hard_coded_addr.addr;
- uint64_t op2_addr = op2_val->data.x_ptr.special == ConstPtrSpecialNull ?
- 0 : op2_val->data.x_ptr.data.hard_coded_addr.addr;
- Cmp cmp_result;
- if (op1_addr > op2_addr) {
- cmp_result = CmpGT;
- } else if (op1_addr < op2_addr) {
- cmp_result = CmpLT;
- } else {
- cmp_result = CmpEQ;
- }
- bool answer = resolve_cmp_op_id(op_id, cmp_result);
- return ir_const_bool(ira, &bin_op_instruction->base, answer);
- }
- } else {
- bool are_equal = one_possible_value || const_values_equal(ira->codegen, op1_val, op2_val);
- bool answer;
- if (op_id == IrBinOpCmpEq) {
- answer = are_equal;
- } else if (op_id == IrBinOpCmpNotEq) {
- answer = !are_equal;
- } else {
- zig_unreachable();
- }
- return ir_const_bool(ira, &bin_op_instruction->base, answer);
+ expand_undef_array(ira->codegen, &result->value);
+ for (size_t i = 0;i < resolved_type->data.vector.len;i++) {
+ IrInstruction *cur_res = ir_evaluate_bin_op_cmp(ira, resolved_type->data.vector.elem_type,
+ &op1_val->data.x_array.data.s_none.elements[i],
+ &op2_val->data.x_array.data.s_none.elements[i],
+ bin_op_instruction, op_id, one_possible_value);
+ copy_const_val(&result->value.data.x_array.data.s_none.elements[i], &cur_res->value, false);
}
+ return result;
}
// some comparisons with unsigned numbers can be evaluated
@@ -13564,7 +13589,11 @@ never_mind_just_calculate_it_normally:
IrInstruction *result = ir_build_bin_op(&ira->new_irb,
bin_op_instruction->base.scope, bin_op_instruction->base.source_node,
op_id, casted_op1, casted_op2, bin_op_instruction->safety_check_on);
- result->value.type = ira->codegen->builtin_types.entry_bool;
+ if (resolved_type->id == ZigTypeIdVector)
+ result->value.type = get_vector_type(ira->codegen, resolved_type->data.vector.len,
+ ira->codegen->builtin_types.entry_bool);
+ else
+ result->value.type = ira->codegen->builtin_types.entry_bool;
return result;
}
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 94d3aa1a45..27277b5e52 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -30,6 +30,23 @@ test "vector wrap operators" {
comptime S.doTheTest();
}
+test "vector bin compares with mem.eql" {
+ const S = struct {
+ fn doTheTest() void {
+ var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
+ var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 30, 4 };
+ expect(mem.eql(bool, ([4]bool)(v == x), [4]bool{ false, false, true, false}));
+ expect(mem.eql(bool, ([4]bool)(v != x), [4]bool{ true, true, false, true}));
+ expect(mem.eql(bool, ([4]bool)(v < x), [4]bool{ false, true, false, false}));
+ expect(mem.eql(bool, ([4]bool)(v > x), [4]bool{ true, false, false, true}));
+ expect(mem.eql(bool, ([4]bool)(v <= x), [4]bool{ false, true, true, false}));
+ expect(mem.eql(bool, ([4]bool)(v >= x), [4]bool{ true, false, true, true}));
+ }
+ };
+ S.doTheTest();
+ comptime S.doTheTest();
+}
+
test "vector int operators" {
const S = struct {
fn doTheTest() void {
From 9e4065fa738f040dd338c613409fc1089cc33580 Mon Sep 17 00:00:00 2001
From: Andrew Kelley
Date: Wed, 18 Sep 2019 10:52:32 -0400
Subject: [PATCH 4/7] remove TODO regarding lazy values
The question was:
> // TODO do we need lazy values on vector comparisons?
Nope, in fact the existing code already was returning ErrorNotLazy
for that particular type, and would already goto
never_mind_just_calculate_it_normally. So the explicit check for
ZigTypeIdVector is not needed. I appreciate the caution though.
---
src/ir.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/ir.cpp b/src/ir.cpp
index b2a32c96d0..6de08de913 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13480,8 +13480,7 @@ static IrInstruction *ir_analyze_bin_op_cmp(IrAnalyze *ira, IrInstructionBinOp *
}
if (one_possible_value || (instr_is_comptime(casted_op1) && instr_is_comptime(casted_op2))) {
- // TODO do we need lazy values on vector comparisons?
- if (resolved_type->id != ZigTypeIdVector) {
+ {
// Before resolving the values, we special case comparisons against zero. These can often be done
// without resolving lazy values, preventing potential dependency loops.
Cmp op1_cmp_zero;
@@ -13589,11 +13588,12 @@ never_mind_just_calculate_it_normally:
IrInstruction *result = ir_build_bin_op(&ira->new_irb,
bin_op_instruction->base.scope, bin_op_instruction->base.source_node,
op_id, casted_op1, casted_op2, bin_op_instruction->safety_check_on);
- if (resolved_type->id == ZigTypeIdVector)
+ if (resolved_type->id == ZigTypeIdVector) {
result->value.type = get_vector_type(ira->codegen, resolved_type->data.vector.len,
ira->codegen->builtin_types.entry_bool);
- else
+ } else {
result->value.type = ira->codegen->builtin_types.entry_bool;
+ }
return result;
}
From 193604c837df75ab0c3fa5860f8b234263fe5b50 Mon Sep 17 00:00:00 2001
From: Shawn Landden
Date: Sat, 29 Jun 2019 11:32:26 -0500
Subject: [PATCH 5/7] stage1: add @shuffle() shufflevector support
I change the semantics of the mask operand, to make it a little more
flexible. There is no real danger in this because it is a compile-error
if you do it the LLVM way (and there is an appropiate error to tell you
this).
v2: avoid problems with double-free
---
doc/langref.html.in | 22 +++
src/all_types.hpp | 11 ++
src/codegen.cpp | 32 ++++
src/ir.cpp | 274 +++++++++++++++++++++++++++++++
src/ir_print.cpp | 17 ++
test/compile_errors.zig | 13 ++
test/stage1/behavior/shuffle.zig | 57 +++++++
7 files changed, 426 insertions(+)
create mode 100644 test/stage1/behavior/shuffle.zig
diff --git a/doc/langref.html.in b/doc/langref.html.in
index 374fbfcde5..7ae0ee7c1c 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8226,6 +8226,28 @@ fn foo(comptime T: type, ptr: *T) T {
{#link|pointer|Pointers#}.
{#header_close#}
+
+ {#header_open|@shuffle#}
+ {#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}
+
+ Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
+ (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
+ Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
+ from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
+ operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
+ value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
+ then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
+ the relevent LLVM Documentation on
+ {#syntax#}shufflevector{#endsyntax#}, although note that the mask values are interpreted differently than in LLVM-IR.
+ Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
+ The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
+ and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
+ {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.
+
+ {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
+ {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
+
+ {#header_close#}
{#header_close#}
{#header_open|Build Mode#}
diff --git a/src/all_types.hpp b/src/all_types.hpp
index e682eb8de1..deb56cbb40 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1611,6 +1611,7 @@ enum BuiltinFnId {
BuiltinFnIdIntToEnum,
BuiltinFnIdIntType,
BuiltinFnIdVectorType,
+ BuiltinFnIdShuffle,
BuiltinFnIdSetCold,
BuiltinFnIdSetRuntimeSafety,
BuiltinFnIdSetFloatMode,
@@ -2428,6 +2429,7 @@ enum IrInstructionId {
IrInstructionIdBoolToInt,
IrInstructionIdIntType,
IrInstructionIdVectorType,
+ IrInstructionIdShuffleVector,
IrInstructionIdBoolNot,
IrInstructionIdMemset,
IrInstructionIdMemcpy,
@@ -3669,6 +3671,15 @@ struct IrInstructionVectorToArray {
IrInstruction *result_loc;
};
+struct IrInstructionShuffleVector {
+ IrInstruction base;
+
+ IrInstruction *scalar_type;
+ IrInstruction *a;
+ IrInstruction *b;
+ IrInstruction *mask; // This is in zig-format, not llvm format
+};
+
struct IrInstructionAssertZero {
IrInstruction base;
diff --git a/src/codegen.cpp b/src/codegen.cpp
index e4b47be8e5..2f1488635a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4581,6 +4581,35 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
}
+static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
+ uint64_t len_a = instruction->a->value.type->data.vector.len;
+ uint64_t len_c = instruction->mask->value.type->data.vector.len;
+
+ // LLVM uses integers larger than the length of the first array to
+ // index into the second array. This was deemed unnecessarily fragile
+ // when changing code, so Zig uses negative numbers to index the
+ // second vector. These start at -1 and go down, and are easiest to use
+ // with the ~ operator. Here we convert between the two formats.
+ IrInstruction *mask = instruction->mask;
+ LLVMValueRef *values = allocate(len_c);
+ for (uint64_t i = 0;i < len_c;i++) {
+ if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
+ values[i] = LLVMGetUndef(LLVMInt32Type());
+ } else {
+ int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+ if (v < 0)
+ v = (uint32_t)~v + (uint32_t)len_a;
+ values[i] = LLVMConstInt(LLVMInt32Type(), v, false);
+ }
+ }
+
+ return LLVMBuildShuffleVector(g->builder,
+ ir_llvm_value(g, instruction->a),
+ ir_llvm_value(g, instruction->b),
+ LLVMConstVector(values, len_c),
+ "");
+}
+
static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
ZigType *int_type = instruction->op->value.type;
LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount);
@@ -6095,6 +6124,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
return ir_render_spill_begin(g, executable, (IrInstructionSpillBegin *)instruction);
case IrInstructionIdSpillEnd:
return ir_render_spill_end(g, executable, (IrInstructionSpillEnd *)instruction);
+ case IrInstructionIdShuffleVector:
+ return ir_render_shuffle_vector(g, executable, (IrInstructionShuffleVector *) instruction);
}
zig_unreachable();
}
@@ -7785,6 +7816,7 @@ static void define_builtin_fns(CodeGen *g) {
create_builtin_fn(g, BuiltinFnIdCompileLog, "compileLog", SIZE_MAX);
create_builtin_fn(g, BuiltinFnIdIntType, "IntType", 2); // TODO rename to Int
create_builtin_fn(g, BuiltinFnIdVectorType, "Vector", 2);
+ create_builtin_fn(g, BuiltinFnIdShuffle, "shuffle", 4);
create_builtin_fn(g, BuiltinFnIdSetCold, "setCold", 1);
create_builtin_fn(g, BuiltinFnIdSetRuntimeSafety, "setRuntimeSafety", 1);
create_builtin_fn(g, BuiltinFnIdSetFloatMode, "setFloatMode", 1);
diff --git a/src/ir.cpp b/src/ir.cpp
index 6de08de913..f62a58e37e 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -717,6 +717,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionVectorType *) {
return IrInstructionIdVectorType;
}
+static constexpr IrInstructionId ir_instruction_id(IrInstructionShuffleVector *) {
+ return IrInstructionIdShuffleVector;
+}
+
static constexpr IrInstructionId ir_instruction_id(IrInstructionBoolNot *) {
return IrInstructionIdBoolNot;
}
@@ -2277,6 +2281,25 @@ static IrInstruction *ir_build_vector_type(IrBuilder *irb, Scope *scope, AstNode
return &instruction->base;
}
+static IrInstruction *ir_build_shuffle_vector(IrBuilder *irb, Scope *scope, AstNode *source_node,
+ IrInstruction *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+ IrInstructionShuffleVector *instruction = ir_build_instruction(irb, scope, source_node);
+ instruction->scalar_type = scalar_type;
+ instruction->a = a;
+ instruction->b = b;
+ instruction->mask = mask;
+
+ if (scalar_type != nullptr) {
+ ir_ref_instruction(scalar_type, irb->current_basic_block);
+ }
+ ir_ref_instruction(a, irb->current_basic_block);
+ ir_ref_instruction(b, irb->current_basic_block);
+ ir_ref_instruction(mask, irb->current_basic_block);
+
+ return &instruction->base;
+}
+
static IrInstruction *ir_build_bool_not(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) {
IrInstructionBoolNot *instruction = ir_build_instruction(irb, scope, source_node);
instruction->value = value;
@@ -4936,6 +4959,32 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
IrInstruction *vector_type = ir_build_vector_type(irb, scope, node, arg0_value, arg1_value);
return ir_lval_wrap(irb, scope, vector_type, lval, result_loc);
}
+ case BuiltinFnIdShuffle:
+ {
+ AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+ IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+ if (arg0_value == irb->codegen->invalid_instruction)
+ return arg0_value;
+
+ AstNode *arg1_node = node->data.fn_call_expr.params.at(1);
+ IrInstruction *arg1_value = ir_gen_node(irb, arg1_node, scope);
+ if (arg1_value == irb->codegen->invalid_instruction)
+ return arg1_value;
+
+ AstNode *arg2_node = node->data.fn_call_expr.params.at(2);
+ IrInstruction *arg2_value = ir_gen_node(irb, arg2_node, scope);
+ if (arg2_value == irb->codegen->invalid_instruction)
+ return arg2_value;
+
+ AstNode *arg3_node = node->data.fn_call_expr.params.at(3);
+ IrInstruction *arg3_value = ir_gen_node(irb, arg3_node, scope);
+ if (arg3_value == irb->codegen->invalid_instruction)
+ return arg3_value;
+
+ IrInstruction *shuffle_vector = ir_build_shuffle_vector(irb, scope, node,
+ arg0_value, arg1_value, arg2_value, arg3_value);
+ return ir_lval_wrap(irb, scope, shuffle_vector, lval, result_loc);
+ }
case BuiltinFnIdMemcpy:
{
AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -22063,6 +22112,228 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
return ir_const_type(ira, &instruction->base, vector_type);
}
+static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
+ ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) {
+ assert(source_instr && scalar_type && a && b && mask);
+ assert(scalar_type->id == ZigTypeIdBool ||
+ scalar_type->id == ZigTypeIdInt ||
+ scalar_type->id == ZigTypeIdFloat ||
+ scalar_type->id == ZigTypeIdPointer);
+
+ ZigType *mask_type = mask->value.type;
+ if (type_is_invalid(mask_type))
+ return ira->codegen->invalid_instruction;
+
+ const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'";
+
+ if (mask_type->id == ZigTypeIdArray) {
+ ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
+ mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
+ if (!mask)
+ return ira->codegen->invalid_instruction;
+ mask_type = vector_type;
+ }
+
+ if (mask_type->id != ZigTypeIdVector) {
+ ir_add_error(ira, mask,
+ buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+
+ ZigType *mask_scalar_type = mask_type->data.array.child_type;
+ if (mask_scalar_type->id != ZigTypeIdInt) {
+ ir_add_error(ira, mask,
+ buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+
+ if (mask_scalar_type->data.integral.bit_count != 32 ||
+ mask_scalar_type->data.integral.is_signed == false) {
+ ir_add_error(ira, mask,
+ buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+
+ uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len;
+ if (a->value.type->id != ZigTypeIdVector) {
+ if (a->value.type->id != ZigTypeIdUndefined) {
+ ir_add_error(ira, a,
+ buf_sprintf("expected vector of element type '%s' got '%s'",
+ buf_ptr(&scalar_type->name),
+ buf_ptr(&a->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+ } else {
+ len_a = a->value.type->data.vector.len;
+ }
+
+ if (b->value.type->id != ZigTypeIdVector) {
+ if (b->value.type->id != ZigTypeIdUndefined) {
+ ir_add_error(ira, b,
+ buf_sprintf("expected vector of element type '%s' got '%s'",
+ buf_ptr(&scalar_type->name),
+ buf_ptr(&b->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+ } else {
+ len_b = b->value.type->data.vector.len;
+ }
+
+ if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
+ return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
+ }
+
+ // undefined is a vector up to length of the other vector.
+ if (a->value.type->id == ZigTypeIdUndefined) {
+ a = ir_const_undef(ira, a, b->value.type);
+ len_a = b->value.type->data.vector.len;
+ } else if (b->value.type->id == ZigTypeIdUndefined) {
+ b = ir_const_undef(ira, b, a->value.type);
+ len_b = a->value.type->data.vector.len;
+ }
+
+ // FIXME I think this needs to be more sophisticated
+ if (a->value.type->data.vector.elem_type != scalar_type) {
+ ir_add_error(ira, a,
+ buf_sprintf("element type '%s' does not match '%s'",
+ buf_ptr(&a->value.type->data.vector.elem_type->name),
+ buf_ptr(&scalar_type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+ if (b->value.type->data.vector.elem_type != scalar_type) {
+ ir_add_error(ira, b,
+ buf_sprintf("element type '%s' does not match '%s'",
+ buf_ptr(&b->value.type->data.vector.elem_type->name),
+ buf_ptr(&scalar_type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+
+ if (a->value.type != b->value.type) {
+ assert(len_a != len_b);
+ uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b);
+ bool expand_b = len_b < len_a;
+ IrInstruction *expand_mask = ir_const(ira, mask,
+ get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
+ expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
+ uint32_t i = 0;
+ for (; i < len_min; i++)
+ bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
+ for (; i < len_max; i++)
+ bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+ IrInstruction *undef = ir_const_undef(ira, source_instr,
+ get_vector_type(ira->codegen, len_min, scalar_type));
+ if (expand_b) {
+ if (instr_is_comptime(b)) {
+ ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
+ b->value.data.x_array.data.s_none.elements =
+ allocate(len_a);
+ memcpy(b->value.data.x_array.data.s_none.elements, old,
+ b->value.type->data.vector.len * sizeof(ConstExprValue));
+ } else {
+ b = ir_build_shuffle_vector(&ira->new_irb,
+ source_instr->scope, source_instr->source_node,
+ nullptr, b, undef, expand_mask);
+ b->value.special = ConstValSpecialRuntime;
+ }
+ b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
+ } else {
+ if (instr_is_comptime(a)) {
+ ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
+ a->value.data.x_array.data.s_none.elements =
+ allocate(len_b);
+ memcpy(a->value.data.x_array.data.s_none.elements, old,
+ a->value.type->data.vector.len * sizeof(ConstExprValue));
+ } else {
+ a = ir_build_shuffle_vector(&ira->new_irb,
+ source_instr->scope, source_instr->source_node,
+ nullptr, a, undef, expand_mask);
+ a->value.special = ConstValSpecialRuntime;
+ }
+ a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
+ }
+ }
+ ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+ if (!mask_val) {
+ ir_add_error(ira, mask,
+ buf_sprintf("mask must be comptime"));
+ return ira->codegen->invalid_instruction;
+ }
+ for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
+ if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
+ continue;
+ int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+ if (v >= 0 && (uint64_t)v + 1 > len_a) {
+ ErrorMsg *msg = ir_add_error(ira, mask,
+ buf_sprintf("mask index out of bounds"));
+ add_error_note(ira->codegen, msg, mask->source_node,
+ buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
+ if ((uint64_t)v <= len_a + len_b)
+ add_error_note(ira->codegen, msg, mask->source_node,
+ buf_sprintf("selections from the second vector are specified with negative numbers"));
+ } else if (v < 0 && (uint64_t)~v + 1 > len_b) {
+ ErrorMsg *msg = ir_add_error(ira, mask,
+ buf_sprintf("mask index out of bounds"));
+ add_error_note(ira->codegen, msg, mask->source_node,
+ buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
+ }
+ else
+ continue;
+ return ira->codegen->invalid_instruction;
+ }
+
+ ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type);
+ if (instr_is_comptime(a) &&
+ instr_is_comptime(b)) {
+ IrInstruction *result = ir_const(ira, source_instr, result_type);
+ result->value.data.x_array.data.s_none.elements = create_const_vals(len_c);
+ for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
+ if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
+ result->value.data.x_array.data.s_none.elements[i].special =
+ ConstValSpecialUndef;
+ int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+ if (v >= 0)
+ result->value.data.x_array.data.s_none.elements[i] =
+ a->value.data.x_array.data.s_none.elements[v];
+ else if (v < 0)
+ result->value.data.x_array.data.s_none.elements[i] =
+ b->value.data.x_array.data.s_none.elements[~v];
+ else
+ zig_unreachable();
+ result->value.data.x_array.data.s_none.elements[i].special =
+ ConstValSpecialStatic;
+ }
+ result->value.special = ConstValSpecialStatic;
+ return result;
+ }
+
+ // All static analysis passed, and not comptime
+ IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
+ source_instr->scope, source_instr->source_node,
+ nullptr, a, b, mask);
+ result->value.type = result_type;
+ result->value.special = ConstValSpecialRuntime;
+ return result;
+}
+
+static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
+ ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type);
+ assert(scalar_type);
+ if (type_is_invalid(scalar_type))
+ return ira->codegen->invalid_instruction;
+
+ if (scalar_type->id != ZigTypeIdBool &&
+ scalar_type->id != ZigTypeIdInt &&
+ scalar_type->id != ZigTypeIdFloat &&
+ scalar_type->id != ZigTypeIdPointer) {
+ ir_add_error(ira, instruction->scalar_type,
+ buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+ buf_ptr(&scalar_type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+
+ return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child);
+}
+
static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
IrInstruction *value = instruction->value->child;
if (type_is_invalid(value->value.type))
@@ -25607,6 +25878,8 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
return ir_analyze_instruction_int_type(ira, (IrInstructionIntType *)instruction);
case IrInstructionIdVectorType:
return ir_analyze_instruction_vector_type(ira, (IrInstructionVectorType *)instruction);
+ case IrInstructionIdShuffleVector:
+ return ir_analyze_instruction_shuffle_vector(ira, (IrInstructionShuffleVector *)instruction);
case IrInstructionIdBoolNot:
return ir_analyze_instruction_bool_not(ira, (IrInstructionBoolNot *)instruction);
case IrInstructionIdMemset:
@@ -25942,6 +26215,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
case IrInstructionIdTruncate:
case IrInstructionIdIntType:
case IrInstructionIdVectorType:
+ case IrInstructionIdShuffleVector:
case IrInstructionIdBoolNot:
case IrInstructionIdSliceSrc:
case IrInstructionIdMemberCount:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index f2877b46e6..8561ed4508 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -42,6 +42,8 @@ static const char* ir_instruction_type_str(IrInstruction* instruction) {
switch (instruction->id) {
case IrInstructionIdInvalid:
return "Invalid";
+ case IrInstructionIdShuffleVector:
+ return "Shuffle";
case IrInstructionIdDeclVarSrc:
return "DeclVarSrc";
case IrInstructionIdDeclVarGen:
@@ -1208,6 +1210,18 @@ static void ir_print_vector_type(IrPrint *irp, IrInstructionVectorType *instruct
fprintf(irp->f, ")");
}
+static void ir_print_shuffle_vector(IrPrint *irp, IrInstructionShuffleVector *instruction) {
+ fprintf(irp->f, "@shuffle(");
+ ir_print_other_instruction(irp, instruction->scalar_type);
+ fprintf(irp->f, ", ");
+ ir_print_other_instruction(irp, instruction->a);
+ fprintf(irp->f, ", ");
+ ir_print_other_instruction(irp, instruction->b);
+ fprintf(irp->f, ", ");
+ ir_print_other_instruction(irp, instruction->mask);
+ fprintf(irp->f, ")");
+}
+
static void ir_print_bool_not(IrPrint *irp, IrInstructionBoolNot *instruction) {
fprintf(irp->f, "! ");
ir_print_other_instruction(irp, instruction->value);
@@ -2143,6 +2157,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction, bool
case IrInstructionIdVectorType:
ir_print_vector_type(irp, (IrInstructionVectorType *)instruction);
break;
+ case IrInstructionIdShuffleVector:
+ ir_print_shuffle_vector(irp, (IrInstructionShuffleVector *)instruction);
+ break;
case IrInstructionIdBoolNot:
ir_print_bool_not(irp, (IrInstructionBoolNot *)instruction);
break;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 9d96d6f948..d9b4ee6a95 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6484,6 +6484,19 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
"tmp.zig:7:23: error: unable to evaluate constant expression",
);
+ cases.addTest(
+ "using LLVM syntax for @shuffle",
+ \\export fn entry() void {
+ \\ const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3};
+ \\ const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7};
+ \\ var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7});
+ \\}
+ ,
+ "tmp.zig:4:39: error: mask index out of bounds",
+ "tmp.zig:4:39: note: when computing vector element at index 4",
+ "tmp.zig:4:39: note: selections from the second vector are specified with negative numbers",
+ );
+
cases.addTest(
"nested vectors",
\\export fn entry() void {
diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig
new file mode 100644
index 0000000000..70bff5991e
--- /dev/null
+++ b/test/stage1/behavior/shuffle.zig
@@ -0,0 +1,57 @@
+const std = @import("std");
+const mem = std.mem;
+const expect = std.testing.expect;
+
+test "@shuffle" {
+ const S = struct {
+ fn doTheTest() void {
+ var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
+ var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 };
+ const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3)};
+ var res = @shuffle(i32, v, x, mask);
+ expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
+
+ // Implicit cast from array (of mask)
+ res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3)});
+ expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
+
+ // Undefined
+ const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0};
+ res = @shuffle(i32, v, undefined, mask2);
+ expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647}));
+
+ // Upcasting of b
+ var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined};
+ const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3};
+ res = @shuffle(i32, x, v2, mask3);
+ expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 }));
+
+ // Upcasting of a
+ var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2};
+ const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3)};
+ res = @shuffle(i32, v3, x, mask4);
+ expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 }));
+
+ // bool
+ {
+ var x2: @Vector(4, bool) = [4]bool{ false, true, false, true};
+ var v4: @Vector(2, bool) = [2]bool{ true, false};
+ const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+ var res2 = @shuffle(bool, x2, v4, mask5);
+ expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
+ }
+
+ // FIXME re-enable when LLVM codegen is fixed
+ // https://bugs.llvm.org/show_bug.cgi?id=42803
+ if (false) {
+ var x2: @Vector(3, bool) = [3]bool{ false, true, false};
+ var v4: @Vector(2, bool) = [2]bool{ true, false};
+ const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+ var res2 = @shuffle(bool, x2, v4, mask5);
+ expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
+ }
+ }
+ };
+ S.doTheTest();
+ comptime S.doTheTest();
+}
From 2038f4d45a597cc672380c0a5fc8dd98e928d24c Mon Sep 17 00:00:00 2001
From: Andrew Kelley
Date: Wed, 18 Sep 2019 15:41:56 -0400
Subject: [PATCH 6/7] rework the implementation
* update documentation
- move `@shuffle` to be sorted alphabetically
- remove mention of LLVM
- minor clarifications & rewording
* introduce ir_resolve_vector_elem_type to avoid duplicate compile
error message and duplicate vector element checking logic
* rework ir_analyze_shuffle_vector to solve various issues
* improve `@shuffle` to allow implicit cast of arrays
* the shuffle tests weren't being run
---
doc/langref.html.in | 59 +++--
src/codegen.cpp | 19 +-
src/ir.cpp | 375 +++++++++++++++----------------
test/compile_errors.zig | 14 +-
test/stage1/behavior.zig | 1 +
test/stage1/behavior/shuffle.zig | 32 +--
6 files changed, 250 insertions(+), 250 deletions(-)
diff --git a/doc/langref.html.in b/doc/langref.html.in
index 7ae0ee7c1c..8a303640e6 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -7673,6 +7673,43 @@ test "@setRuntimeSafety" {
{#see_also|@shlExact|@shlWithOverflow#}
{#header_close#}
+ {#header_open|@shuffle#}
+ {#syntax#}@shuffle(comptime E: type, a: @Vector(a_len, E), b: @Vector(b_len, E), comptime mask: @Vector(mask_len, i32)) @Vector(mask_len, E){#endsyntax#}
+
+ Constructs a new {#link|vector|Vectors#} by selecting elements from {#syntax#}a{#endsyntax#} and
+ {#syntax#}b{#endsyntax#} based on {#syntax#}mask{#endsyntax#}.
+
+
+ Each element in {#syntax#}mask{#endsyntax#} selects an element from either {#syntax#}a{#endsyntax#} or
+ {#syntax#}b{#endsyntax#}. Positive numbers select from {#syntax#}a{#endsyntax#} starting at 0.
+ Negative values select from {#syntax#}b{#endsyntax#}, starting at {#syntax#}-1{#endsyntax#} and going down.
+ It is recommended to use the {#syntax#}~{#endsyntax#} operator from indexes from {#syntax#}b{#endsyntax#}
+ so that both indexes can start from {#syntax#}0{#endsyntax#} (i.e. {#syntax#}~i32(0){#endsyntax#} is
+ {#syntax#}-1{#endsyntax#}).
+
+
+ For each element of {#syntax#}mask{#endsyntax#}, if it or the selected value from
+ {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#},
+ then the resulting element is {#syntax#}undefined{#endsyntax#}.
+
+
+ {#syntax#}a_len{#endsyntax#} and {#syntax#}b_len{#endsyntax#} may differ in length. Out-of-bounds element
+ indexes in {#syntax#}mask{#endsyntax#} result in compile errors.
+
+
+ If {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#}, it
+ is equivalent to a vector of all {#syntax#}undefined{#endsyntax#} with the same length as the other vector.
+ If both vectors are {#syntax#}undefined{#endsyntax#}, {#syntax#}@shuffle{#endsyntax#} returns
+ a vector with all elements {#syntax#}undefined{#endsyntax#}.
+
+
+ {#syntax#}E{#endsyntax#} must be an {#link|integer|Integers#}, {#link|float|Floats#},
+ {#link|pointer|Pointers#}, or {#syntax#}bool{#endsyntax#}. The mask may be any vector length, and its
+ length determines the result length.
+
+ {#see_also|SIMD#}
+ {#header_close#}
+
{#header_open|@sizeOf#}
{#syntax#}@sizeOf(comptime T: type) comptime_int{#endsyntax#}
@@ -8226,28 +8263,6 @@ fn foo(comptime T: type, ptr: *T) T {
{#link|pointer|Pointers#}.
{#header_close#}
-
- {#header_open|@shuffle#}
- {#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}
-
- Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
- (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
- Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
- from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
- operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
- value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
- then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
- the relevent LLVM Documentation on
- {#syntax#}shufflevector{#endsyntax#}, although note that the mask values are interpreted differently than in LLVM-IR.
- Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
- The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
- and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
- {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.
-
- {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
- {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
-
- {#header_close#}
{#header_close#}
{#header_open|Build Mode#}
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 2f1488635a..7676b3bbd0 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4583,7 +4583,7 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
uint64_t len_a = instruction->a->value.type->data.vector.len;
- uint64_t len_c = instruction->mask->value.type->data.vector.len;
+ uint64_t len_mask = instruction->mask->value.type->data.vector.len;
// LLVM uses integers larger than the length of the first array to
// index into the second array. This was deemed unnecessarily fragile
@@ -4591,23 +4591,24 @@ static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executabl
// second vector. These start at -1 and go down, and are easiest to use
// with the ~ operator. Here we convert between the two formats.
IrInstruction *mask = instruction->mask;
- LLVMValueRef *values = allocate(len_c);
- for (uint64_t i = 0;i < len_c;i++) {
+ LLVMValueRef *values = allocate(len_mask);
+ for (uint64_t i = 0; i < len_mask; i++) {
if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
values[i] = LLVMGetUndef(LLVMInt32Type());
} else {
- int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
- if (v < 0)
- v = (uint32_t)~v + (uint32_t)len_a;
- values[i] = LLVMConstInt(LLVMInt32Type(), v, false);
+ int32_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+ uint32_t index_val = (v >= 0) ? (uint32_t)v : (uint32_t)~v + (uint32_t)len_a;
+ values[i] = LLVMConstInt(LLVMInt32Type(), index_val, false);
}
}
+ LLVMValueRef llvm_mask_value = LLVMConstVector(values, len_mask);
+ free(values);
+
return LLVMBuildShuffleVector(g->builder,
ir_llvm_value(g, instruction->a),
ir_llvm_value(g, instruction->b),
- LLVMConstVector(values, len_c),
- "");
+ llvm_mask_value, "");
}
static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
diff --git a/src/ir.cpp b/src/ir.cpp
index f62a58e37e..cbc00f0cfe 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11049,6 +11049,19 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
}
+static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
+ ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
+ if (type_is_invalid(elem_type))
+ return ira->codegen->builtin_types.entry_invalid;
+ if (!is_valid_vector_elem_type(elem_type)) {
+ ir_add_error(ira, elem_type_value,
+ buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+ buf_ptr(&elem_type->name)));
+ return ira->codegen->builtin_types.entry_invalid;
+ }
+ return elem_type;
+}
+
static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
ZigType *ty = ir_resolve_type(ira, type_value);
if (type_is_invalid(ty))
@@ -22096,242 +22109,212 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len))
return ira->codegen->invalid_instruction;
- ZigType *elem_type = ir_resolve_type(ira, instruction->elem_type->child);
+ ZigType *elem_type = ir_resolve_vector_elem_type(ira, instruction->elem_type->child);
if (type_is_invalid(elem_type))
return ira->codegen->invalid_instruction;
- if (!is_valid_vector_elem_type(elem_type)) {
- ir_add_error(ira, instruction->elem_type,
- buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
- buf_ptr(&elem_type->name)));
- return ira->codegen->invalid_instruction;
- }
-
ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type);
return ir_const_type(ira, &instruction->base, vector_type);
}
static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
- ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) {
- assert(source_instr && scalar_type && a && b && mask);
- assert(scalar_type->id == ZigTypeIdBool ||
- scalar_type->id == ZigTypeIdInt ||
- scalar_type->id == ZigTypeIdFloat ||
- scalar_type->id == ZigTypeIdPointer);
+ ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+ ir_assert(source_instr && scalar_type && a && b && mask, source_instr);
+ ir_assert(is_valid_vector_elem_type(scalar_type), source_instr);
- ZigType *mask_type = mask->value.type;
- if (type_is_invalid(mask_type))
- return ira->codegen->invalid_instruction;
-
- const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'";
-
- if (mask_type->id == ZigTypeIdArray) {
- ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
- mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
- if (!mask)
- return ira->codegen->invalid_instruction;
- mask_type = vector_type;
- }
-
- if (mask_type->id != ZigTypeIdVector) {
- ir_add_error(ira, mask,
- buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
- return ira->codegen->invalid_instruction;
- }
-
- ZigType *mask_scalar_type = mask_type->data.array.child_type;
- if (mask_scalar_type->id != ZigTypeIdInt) {
- ir_add_error(ira, mask,
- buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
- return ira->codegen->invalid_instruction;
- }
-
- if (mask_scalar_type->data.integral.bit_count != 32 ||
- mask_scalar_type->data.integral.is_signed == false) {
- ir_add_error(ira, mask,
- buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
- return ira->codegen->invalid_instruction;
- }
-
- uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len;
- if (a->value.type->id != ZigTypeIdVector) {
- if (a->value.type->id != ZigTypeIdUndefined) {
- ir_add_error(ira, a,
- buf_sprintf("expected vector of element type '%s' got '%s'",
- buf_ptr(&scalar_type->name),
- buf_ptr(&a->value.type->name)));
- return ira->codegen->invalid_instruction;
- }
+ uint32_t len_mask;
+ if (mask->value.type->id == ZigTypeIdVector) {
+ len_mask = mask->value.type->data.vector.len;
+ } else if (mask->value.type->id == ZigTypeIdArray) {
+ len_mask = mask->value.type->data.array.len;
} else {
+ ir_add_error(ira, mask,
+ buf_sprintf("expected vector or array, found '%s'",
+ buf_ptr(&mask->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+ mask = ir_implicit_cast(ira, mask, get_vector_type(ira->codegen, len_mask,
+ ira->codegen->builtin_types.entry_i32));
+ if (type_is_invalid(mask->value.type))
+ return ira->codegen->invalid_instruction;
+
+ uint32_t len_a;
+ if (a->value.type->id == ZigTypeIdVector) {
len_a = a->value.type->data.vector.len;
+ } else if (a->value.type->id == ZigTypeIdArray) {
+ len_a = a->value.type->data.array.len;
+ } else if (a->value.type->id == ZigTypeIdUndefined) {
+ len_a = UINT32_MAX;
+ } else {
+ ir_add_error(ira, a,
+ buf_sprintf("expected vector or array with element type '%s', found '%s'",
+ buf_ptr(&scalar_type->name),
+ buf_ptr(&a->value.type->name)));
+ return ira->codegen->invalid_instruction;
}
- if (b->value.type->id != ZigTypeIdVector) {
- if (b->value.type->id != ZigTypeIdUndefined) {
- ir_add_error(ira, b,
- buf_sprintf("expected vector of element type '%s' got '%s'",
- buf_ptr(&scalar_type->name),
- buf_ptr(&b->value.type->name)));
+ uint32_t len_b;
+ if (b->value.type->id == ZigTypeIdVector) {
+ len_b = b->value.type->data.vector.len;
+ } else if (b->value.type->id == ZigTypeIdArray) {
+ len_b = b->value.type->data.array.len;
+ } else if (b->value.type->id == ZigTypeIdUndefined) {
+ len_b = UINT32_MAX;
+ } else {
+ ir_add_error(ira, b,
+ buf_sprintf("expected vector or array with element type '%s', found '%s'",
+ buf_ptr(&scalar_type->name),
+ buf_ptr(&b->value.type->name)));
+ return ira->codegen->invalid_instruction;
+ }
+
+ if (len_a == UINT32_MAX && len_b == UINT32_MAX) {
+ return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_mask, scalar_type));
+ }
+
+ if (len_a == UINT32_MAX) {
+ len_a = len_b;
+ a = ir_const_undef(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+ } else {
+ a = ir_implicit_cast(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+ if (type_is_invalid(a->value.type))
+ return ira->codegen->invalid_instruction;
+ }
+
+ if (len_b == UINT32_MAX) {
+ len_b = len_a;
+ b = ir_const_undef(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+ } else {
+ b = ir_implicit_cast(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+ if (type_is_invalid(b->value.type))
+ return ira->codegen->invalid_instruction;
+ }
+
+ ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+ if (mask_val == nullptr)
+ return ira->codegen->invalid_instruction;
+
+ expand_undef_array(ira->codegen, mask_val);
+
+ for (uint32_t i = 0; i < len_mask; i += 1) {
+ ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+ if (mask_elem_val->special == ConstValSpecialUndef)
+ continue;
+ int32_t v_i32 = bigint_as_signed(&mask_elem_val->data.x_bigint);
+ uint32_t v;
+ IrInstruction *chosen_operand;
+ if (v_i32 >= 0) {
+ v = (uint32_t)v_i32;
+ chosen_operand = a;
+ } else {
+ v = (uint32_t)~v_i32;
+ chosen_operand = b;
+ }
+ if (v >= chosen_operand->value.type->data.vector.len) {
+ ErrorMsg *msg = ir_add_error(ira, mask,
+ buf_sprintf("mask index '%u' has out-of-bounds selection", i));
+ add_error_note(ira->codegen, msg, chosen_operand->source_node,
+ buf_sprintf("selected index '%u' out of bounds of %s", v,
+ buf_ptr(&chosen_operand->value.type->name)));
+ if (chosen_operand == a && v < len_a + len_b) {
+ add_error_note(ira->codegen, msg, b->source_node,
+ buf_create_from_str("selections from the second vector are specified with negative numbers"));
+ }
return ira->codegen->invalid_instruction;
}
- } else {
- len_b = b->value.type->data.vector.len;
}
- if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
- return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
- }
+ ZigType *result_type = get_vector_type(ira->codegen, len_mask, scalar_type);
+ if (instr_is_comptime(a) && instr_is_comptime(b)) {
+ ConstExprValue *a_val = ir_resolve_const(ira, a, UndefOk);
+ if (a_val == nullptr)
+ return ira->codegen->invalid_instruction;
- // undefined is a vector up to length of the other vector.
- if (a->value.type->id == ZigTypeIdUndefined) {
- a = ir_const_undef(ira, a, b->value.type);
- len_a = b->value.type->data.vector.len;
- } else if (b->value.type->id == ZigTypeIdUndefined) {
- b = ir_const_undef(ira, b, a->value.type);
- len_b = a->value.type->data.vector.len;
- }
+ ConstExprValue *b_val = ir_resolve_const(ira, b, UndefOk);
+ if (b_val == nullptr)
+ return ira->codegen->invalid_instruction;
- // FIXME I think this needs to be more sophisticated
- if (a->value.type->data.vector.elem_type != scalar_type) {
- ir_add_error(ira, a,
- buf_sprintf("element type '%s' does not match '%s'",
- buf_ptr(&a->value.type->data.vector.elem_type->name),
- buf_ptr(&scalar_type->name)));
- return ira->codegen->invalid_instruction;
- }
- if (b->value.type->data.vector.elem_type != scalar_type) {
- ir_add_error(ira, b,
- buf_sprintf("element type '%s' does not match '%s'",
- buf_ptr(&b->value.type->data.vector.elem_type->name),
- buf_ptr(&scalar_type->name)));
- return ira->codegen->invalid_instruction;
- }
+ expand_undef_array(ira->codegen, a_val);
+ expand_undef_array(ira->codegen, b_val);
- if (a->value.type != b->value.type) {
- assert(len_a != len_b);
- uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b);
- bool expand_b = len_b < len_a;
- IrInstruction *expand_mask = ir_const(ira, mask,
- get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
- expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
- uint32_t i = 0;
- for (; i < len_min; i++)
- bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
- for (; i < len_max; i++)
- bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
- IrInstruction *undef = ir_const_undef(ira, source_instr,
- get_vector_type(ira->codegen, len_min, scalar_type));
- if (expand_b) {
- if (instr_is_comptime(b)) {
- ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
- b->value.data.x_array.data.s_none.elements =
- allocate(len_a);
- memcpy(b->value.data.x_array.data.s_none.elements, old,
- b->value.type->data.vector.len * sizeof(ConstExprValue));
- } else {
- b = ir_build_shuffle_vector(&ira->new_irb,
- source_instr->scope, source_instr->source_node,
- nullptr, b, undef, expand_mask);
- b->value.special = ConstValSpecialRuntime;
- }
- b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
- } else {
- if (instr_is_comptime(a)) {
- ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
- a->value.data.x_array.data.s_none.elements =
- allocate(len_b);
- memcpy(a->value.data.x_array.data.s_none.elements, old,
- a->value.type->data.vector.len * sizeof(ConstExprValue));
- } else {
- a = ir_build_shuffle_vector(&ira->new_irb,
- source_instr->scope, source_instr->source_node,
- nullptr, a, undef, expand_mask);
- a->value.special = ConstValSpecialRuntime;
- }
- a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
- }
- }
- ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
- if (!mask_val) {
- ir_add_error(ira, mask,
- buf_sprintf("mask must be comptime"));
- return ira->codegen->invalid_instruction;
- }
- for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
- if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
- continue;
- int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
- if (v >= 0 && (uint64_t)v + 1 > len_a) {
- ErrorMsg *msg = ir_add_error(ira, mask,
- buf_sprintf("mask index out of bounds"));
- add_error_note(ira->codegen, msg, mask->source_node,
- buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
- if ((uint64_t)v <= len_a + len_b)
- add_error_note(ira->codegen, msg, mask->source_node,
- buf_sprintf("selections from the second vector are specified with negative numbers"));
- } else if (v < 0 && (uint64_t)~v + 1 > len_b) {
- ErrorMsg *msg = ir_add_error(ira, mask,
- buf_sprintf("mask index out of bounds"));
- add_error_note(ira->codegen, msg, mask->source_node,
- buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
- }
- else
- continue;
- return ira->codegen->invalid_instruction;
- }
-
- ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type);
- if (instr_is_comptime(a) &&
- instr_is_comptime(b)) {
IrInstruction *result = ir_const(ira, source_instr, result_type);
- result->value.data.x_array.data.s_none.elements = create_const_vals(len_c);
- for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
- if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
- result->value.data.x_array.data.s_none.elements[i].special =
- ConstValSpecialUndef;
- int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
- if (v >= 0)
- result->value.data.x_array.data.s_none.elements[i] =
- a->value.data.x_array.data.s_none.elements[v];
- else if (v < 0)
- result->value.data.x_array.data.s_none.elements[i] =
- b->value.data.x_array.data.s_none.elements[~v];
- else
- zig_unreachable();
- result->value.data.x_array.data.s_none.elements[i].special =
- ConstValSpecialStatic;
+ result->value.data.x_array.data.s_none.elements = create_const_vals(len_mask);
+ for (uint32_t i = 0; i < mask_val->type->data.vector.len; i += 1) {
+ ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+ ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+ if (mask_elem_val->special == ConstValSpecialUndef) {
+ result_elem_val->special = ConstValSpecialUndef;
+ continue;
+ }
+ int32_t v = bigint_as_signed(&mask_elem_val->data.x_bigint);
+ // We've already checked for and emitted compile errors for index out of bounds here.
+ ConstExprValue *src_elem_val = (v >= 0) ?
+ &a->value.data.x_array.data.s_none.elements[v] :
+ &b->value.data.x_array.data.s_none.elements[~v];
+ copy_const_val(result_elem_val, src_elem_val, false);
+
+ ir_assert(result_elem_val->special == ConstValSpecialStatic, source_instr);
}
result->value.special = ConstValSpecialStatic;
return result;
}
- // All static analysis passed, and not comptime
+ // All static analysis passed, and not comptime.
+ // For runtime codegen, vectors a and b must be the same length. Here we
+ // recursively @shuffle the smaller vector to append undefined elements
+ // to it up to the length of the longer vector. This recursion terminates
+ // in 1 call because these calls to ir_analyze_shuffle_vector guarantee
+ // len_a == len_b.
+ if (len_a != len_b) {
+ uint32_t len_min = min(len_a, len_b);
+ uint32_t len_max = max(len_a, len_b);
+
+ IrInstruction *expand_mask = ir_const(ira, mask,
+ get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
+ expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
+ uint32_t i = 0;
+ for (; i < len_min; i += 1)
+ bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
+ for (; i < len_max; i += 1)
+ bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+
+ IrInstruction *undef = ir_const_undef(ira, source_instr,
+ get_vector_type(ira->codegen, len_min, scalar_type));
+
+ if (len_b < len_a) {
+ b = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, b, undef, expand_mask);
+ } else {
+ a = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, a, undef, expand_mask);
+ }
+ }
+
IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
source_instr->scope, source_instr->source_node,
nullptr, a, b, mask);
result->value.type = result_type;
- result->value.special = ConstValSpecialRuntime;
return result;
}
static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
- ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type);
- assert(scalar_type);
+ ZigType *scalar_type = ir_resolve_vector_elem_type(ira, instruction->scalar_type);
if (type_is_invalid(scalar_type))
return ira->codegen->invalid_instruction;
- if (scalar_type->id != ZigTypeIdBool &&
- scalar_type->id != ZigTypeIdInt &&
- scalar_type->id != ZigTypeIdFloat &&
- scalar_type->id != ZigTypeIdPointer) {
- ir_add_error(ira, instruction->scalar_type,
- buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
- buf_ptr(&scalar_type->name)));
+ IrInstruction *a = instruction->a->child;
+ if (type_is_invalid(a->value.type))
return ira->codegen->invalid_instruction;
- }
- return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child);
+ IrInstruction *b = instruction->b->child;
+ if (type_is_invalid(b->value.type))
+ return ira->codegen->invalid_instruction;
+
+ IrInstruction *mask = instruction->mask->child;
+ if (type_is_invalid(mask->value.type))
+ return ira->codegen->invalid_instruction;
+
+ return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
}
static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index d9b4ee6a95..1fe3fc58ab 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6485,16 +6485,16 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
);
cases.addTest(
- "using LLVM syntax for @shuffle",
+ "@shuffle with selected index past first vector length",
\\export fn entry() void {
- \\ const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3};
- \\ const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7};
- \\ var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7});
+ \\ const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
+ \\ const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
+ \\ var z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
\\}
,
- "tmp.zig:4:39: error: mask index out of bounds",
- "tmp.zig:4:39: note: when computing vector element at index 4",
- "tmp.zig:4:39: note: selections from the second vector are specified with negative numbers",
+ "tmp.zig:4:39: error: mask index '4' has out-of-bounds selection",
+ "tmp.zig:4:27: note: selected index '7' out of bounds of @Vector(4, u32)",
+ "tmp.zig:4:30: note: selections from the second vector are specified with negative numbers",
);
cases.addTest(
diff --git a/test/stage1/behavior.zig b/test/stage1/behavior.zig
index db6cdad3b1..e56fc7ba7f 100644
--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@@ -80,6 +80,7 @@ comptime {
_ = @import("behavior/pub_enum.zig");
_ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
_ = @import("behavior/reflection.zig");
+ _ = @import("behavior/shuffle.zig");
_ = @import("behavior/sizeof_and_typeof.zig");
_ = @import("behavior/slice.zig");
_ = @import("behavior/slicetobytes.zig");
diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig
index 70bff5991e..2029ec582f 100644
--- a/test/stage1/behavior/shuffle.zig
+++ b/test/stage1/behavior/shuffle.zig
@@ -7,46 +7,46 @@ test "@shuffle" {
fn doTheTest() void {
var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 };
- const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3)};
+ const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3) };
var res = @shuffle(i32, v, x, mask);
expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
// Implicit cast from array (of mask)
- res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3)});
+ res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3) });
expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
// Undefined
- const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0};
+ const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0 };
res = @shuffle(i32, v, undefined, mask2);
- expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647}));
+ expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647 }));
// Upcasting of b
- var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined};
- const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3};
+ var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined };
+ const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3 };
res = @shuffle(i32, x, v2, mask3);
expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 }));
// Upcasting of a
- var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2};
- const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3)};
+ var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2 };
+ const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3) };
res = @shuffle(i32, v3, x, mask4);
expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 }));
// bool
{
- var x2: @Vector(4, bool) = [4]bool{ false, true, false, true};
- var v4: @Vector(2, bool) = [2]bool{ true, false};
- const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+ var x2: @Vector(4, bool) = [4]bool{ false, true, false, true };
+ var v4: @Vector(2, bool) = [2]bool{ true, false };
+ const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
var res2 = @shuffle(bool, x2, v4, mask5);
expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
}
- // FIXME re-enable when LLVM codegen is fixed
- // https://bugs.llvm.org/show_bug.cgi?id=42803
+ // TODO re-enable when LLVM codegen is fixed
+ // https://github.com/ziglang/zig/issues/3246
if (false) {
- var x2: @Vector(3, bool) = [3]bool{ false, true, false};
- var v4: @Vector(2, bool) = [2]bool{ true, false};
- const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+ var x2: @Vector(3, bool) = [3]bool{ false, true, false };
+ var v4: @Vector(2, bool) = [2]bool{ true, false };
+ const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
var res2 = @shuffle(bool, x2, v4, mask5);
expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
}
From ef0f3ba905e992556a60f935cbb7cb30cf1f27db Mon Sep 17 00:00:00 2001
From: Andrew Kelley
Date: Wed, 18 Sep 2019 16:34:36 -0400
Subject: [PATCH 7/7] relax std.auto_hash requirements regarding vectors
Previously, auto hash tests required vectors of different types to not
hash to the same value. Now, this is allowed.
---
std/hash/auto_hash.zig | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/std/hash/auto_hash.zig b/std/hash/auto_hash.zig
index d34fc2719a..8a22788e5c 100644
--- a/std/hash/auto_hash.zig
+++ b/std/hash/auto_hash.zig
@@ -116,7 +116,7 @@ pub fn hash(hasher: var, key: var, comptime strat: HashStrategy) void {
// Otherwise, hash every element.
// TODO remove the copy to an array once field access is done.
const array: [info.len]info.child = key;
- comptime var i: u32 = 0;
+ comptime var i = 0;
inline while (i < info.len) : (i += 1) {
hash(hasher, array[i], strat);
}
@@ -357,10 +357,13 @@ test "testHash union" {
test "testHash vector" {
const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
- const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
testing.expect(testHash(a) == testHash(a));
testing.expect(testHash(a) != testHash(b));
- testing.expect(testHash(a) != testHash(c));
+
+ const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
+ const d: @Vector(4, u31) = [_]u31{ 1, 2, 3, 5 };
+ testing.expect(testHash(c) == testHash(c));
+ testing.expect(testHash(c) != testHash(d));
}
test "testHash error union" {