Merge branch 'merge-shawnl-simd5'

This is the commit from Shawn's SIMD patchset regarding `@byteSwap`,
plus my fixups.
This commit is contained in:
Andrew Kelley 2019-09-19 01:05:12 -04:00
commit 0048bcbd71
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
6 changed files with 161 additions and 55 deletions

View File

@ -6542,12 +6542,21 @@ async fn func(y: *i32) void {
{#header_close#}
{#header_open|@byteSwap#}
<pre>{#syntax#}@byteSwap(comptime T: type, integer: T) T{#endsyntax#}</pre>
<pre>{#syntax#}@byteSwap(comptime T: type, operand: T) T{#endsyntax#}</pre>
<p>{#syntax#}T{#endsyntax#} must be an integer type with bit count evenly divisible by 8.</p>
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
<p>
Swaps the byte order of the integer. This converts a big endian integer to a little endian integer,
and converts a little endian integer to a big endian integer.
</p>
<p>
Note that for the purposes of memory layout with respect to endianness, the integer type should be
related to the number of bytes reported by {#link|@sizeOf#} bytes. This is demonstrated with
{#syntax#}u24{#endsyntax#}. {#syntax#}@sizeOf(u24) == 4{#endsyntax#}, which means that a
{#syntax#}u24{#endsyntax#} stored in memory takes 4 bytes, and those 4 bytes are what are swapped on
a little vs big endian system. On the other hand, if {#syntax#}T{#endsyntax#} is specified to
be {#syntax#}u24{#endsyntax#}, then only 3 bytes are reversed.
</p>
{#header_close#}
{#header_open|@bitReverse#}

View File

@ -1771,6 +1771,7 @@ struct ZigLLVMFnKey {
} overflow_arithmetic;
struct {
uint32_t bit_count;
uint32_t vector_len; // 0 means not a vector
} bswap;
struct {
uint32_t bit_count;

View File

@ -6896,7 +6896,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
(uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
case ZigLLVMFnIdBswap:
return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
return (uint32_t)(x.data.bswap.bit_count) * ((uint32_t)3661994335) +
(uint32_t)(x.data.bswap.vector_len) * (((uint32_t)x.id << 5) + 1025);
case ZigLLVMFnIdBitReverse:
return (uint32_t)(x.data.bit_reverse.bit_count) * (uint32_t)2621398431;
case ZigLLVMFnIdOverflowArithmetic:
@ -6919,7 +6920,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
case ZigLLVMFnIdPopCount:
return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
case ZigLLVMFnIdBswap:
return a.data.bswap.bit_count == b.data.bswap.bit_count;
return a.data.bswap.bit_count == b.data.bswap.bit_count &&
a.data.bswap.vector_len == b.data.bswap.vector_len;
case ZigLLVMFnIdBitReverse:
return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
case ZigLLVMFnIdFloatOp:

View File

@ -4505,7 +4505,11 @@ static LLVMValueRef ir_render_optional_unwrap_ptr(CodeGen *g, IrExecutable *exec
}
}
static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnId fn_id) {
static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFnId fn_id) {
bool is_vector = expr_type->id == ZigTypeIdVector;
ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
assert(int_type->id == ZigTypeIdInt);
uint32_t vector_len = is_vector ? expr_type->data.vector.len : 0;
ZigLLVMFnKey key = {};
const char *fn_name;
uint32_t n_args;
@ -4529,6 +4533,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
n_args = 1;
key.id = ZigLLVMFnIdBswap;
key.data.bswap.bit_count = (uint32_t)int_type->data.integral.bit_count;
key.data.bswap.vector_len = vector_len;
} else if (fn_id == BuiltinFnIdBitReverse) {
fn_name = "bitreverse";
n_args = 1;
@ -4543,12 +4548,15 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
return existing_entry->value;
char llvm_name[64];
sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
if (is_vector)
sprintf(llvm_name, "llvm.%s.v%" PRIu32 "i%" PRIu32, fn_name, vector_len, int_type->data.integral.bit_count);
else
sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
LLVMTypeRef param_types[] = {
get_llvm_type(g, int_type),
get_llvm_type(g, expr_type),
LLVMInt1Type(),
};
LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, int_type), param_types, n_args, false);
LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, expr_type), param_types, n_args, false);
LLVMValueRef fn_val = LLVMAddFunction(g->module, llvm_name, fn_type);
assert(LLVMGetIntrinsicID(fn_val));
@ -5542,25 +5550,36 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrIn
static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInstructionBswap *instruction) {
LLVMValueRef op = ir_llvm_value(g, instruction->op);
ZigType *int_type = instruction->base.value.type;
ZigType *expr_type = instruction->base.value.type;
bool is_vector = expr_type->id == ZigTypeIdVector;
ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
assert(int_type->id == ZigTypeIdInt);
if (int_type->data.integral.bit_count % 16 == 0) {
LLVMValueRef fn_val = get_int_builtin_fn(g, instruction->base.value.type, BuiltinFnIdBswap);
LLVMValueRef fn_val = get_int_builtin_fn(g, expr_type, BuiltinFnIdBswap);
return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
}
// Not an even number of bytes, so we zext 1 byte, then bswap, shift right 1 byte, truncate
ZigType *extended_type = get_int_type(g, int_type->data.integral.is_signed,
int_type->data.integral.bit_count + 8);
LLVMValueRef shift_amt = LLVMConstInt(get_llvm_type(g, extended_type), 8, false);
if (is_vector) {
extended_type = get_vector_type(g, expr_type->data.vector.len, extended_type);
LLVMValueRef *values = allocate_nonzero<LLVMValueRef>(expr_type->data.vector.len);
for (uint32_t i = 0; i < expr_type->data.vector.len; i += 1) {
values[i] = shift_amt;
}
shift_amt = LLVMConstVector(values, expr_type->data.vector.len);
free(values);
}
// aabbcc
LLVMValueRef extended = LLVMBuildZExt(g->builder, op, get_llvm_type(g, extended_type), "");
// 00aabbcc
LLVMValueRef fn_val = get_int_builtin_fn(g, extended_type, BuiltinFnIdBswap);
LLVMValueRef swapped = LLVMBuildCall(g->builder, fn_val, &extended, 1, "");
// ccbbaa00
LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped,
LLVMConstInt(get_llvm_type(g, extended_type), 8, false), "");
LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped, shift_amt, "");
// 00ccbbaa
return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, int_type), "");
return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, expr_type), "");
}
static LLVMValueRef ir_render_bit_reverse(CodeGen *g, IrExecutable *executable, IrInstructionBitReverse *instruction) {
@ -5581,7 +5600,7 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
ZigType *elem_type = array_type->data.array.child_type;
bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
if (bitcast_ok) {
LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
@ -5615,7 +5634,7 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
ZigType *elem_type = vector_type->data.vector.elem_type;
bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
if (bitcast_ok) {
LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
LLVMPointerType(vector_type_ref, 0), "");
@ -8888,7 +8907,7 @@ void add_cc_args(CodeGen *g, ZigList<const char *> &args, const char *out_dep_pa
args.append(g->framework_dirs.at(i));
}
//note(dimenus): appending libc headers before c_headers breaks intrinsics
//note(dimenus): appending libc headers before c_headers breaks intrinsics
//and other compiler specific items
// According to Rich Felker libc headers are supposed to go before C language headers.
args.append("-isystem");

View File

@ -11068,8 +11068,15 @@ static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
return ira->codegen->builtin_types.entry_invalid;
if (ty->id != ZigTypeIdInt) {
ir_add_error(ira, type_value,
ErrorMsg *msg = ir_add_error(ira, type_value,
buf_sprintf("expected integer type, found '%s'", buf_ptr(&ty->name)));
if (ty->id == ZigTypeIdVector &&
ty->data.vector.elem_type->id == ZigTypeIdInt)
{
add_error_note(ira->codegen, msg, type_value->source_node,
buf_sprintf("represent vectors with their element types, i.e. '%s'",
buf_ptr(&ty->data.vector.elem_type->name)));
}
return ira->codegen->builtin_types.entry_invalid;
}
@ -25253,21 +25260,35 @@ static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstruct
}
static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstructionBswap *instruction) {
Error err;
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
if (type_is_invalid(int_type))
return ira->codegen->invalid_instruction;
IrInstruction *op = ir_implicit_cast(ira, instruction->op->child, int_type);
IrInstruction *uncasted_op = instruction->op->child;
if (type_is_invalid(uncasted_op->value.type))
return ira->codegen->invalid_instruction;
uint32_t vector_len; // UINT32_MAX means not a vector
if (uncasted_op->value.type->id == ZigTypeIdArray &&
is_valid_vector_elem_type(uncasted_op->value.type->data.array.child_type))
{
vector_len = uncasted_op->value.type->data.array.len;
} else if (uncasted_op->value.type->id == ZigTypeIdVector) {
vector_len = uncasted_op->value.type->data.vector.len;
} else {
vector_len = UINT32_MAX;
}
bool is_vector = (vector_len != UINT32_MAX);
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
IrInstruction *op = ir_implicit_cast(ira, uncasted_op, op_type);
if (type_is_invalid(op->value.type))
return ira->codegen->invalid_instruction;
if (int_type->data.integral.bit_count == 0) {
IrInstruction *result = ir_const(ira, &instruction->base, int_type);
bigint_init_unsigned(&result->value.data.x_bigint, 0);
return result;
}
if (int_type->data.integral.bit_count == 8)
if (int_type->data.integral.bit_count == 8 || int_type->data.integral.bit_count == 0)
return op;
if (int_type->data.integral.bit_count % 8 != 0) {
@ -25282,20 +25303,44 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
if (val == nullptr)
return ira->codegen->invalid_instruction;
if (val->special == ConstValSpecialUndef)
return ir_const_undef(ira, &instruction->base, int_type);
return ir_const_undef(ira, &instruction->base, op_type);
IrInstruction *result = ir_const(ira, &instruction->base, int_type);
IrInstruction *result = ir_const(ira, &instruction->base, op_type);
size_t buf_size = int_type->data.integral.bit_count / 8;
uint8_t *buf = allocate_nonzero<uint8_t>(buf_size);
bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
int_type->data.integral.is_signed);
if (is_vector) {
expand_undef_array(ira->codegen, val);
result->value.data.x_array.data.s_none.elements = create_const_vals(op_type->data.vector.len);
for (unsigned i = 0; i < op_type->data.vector.len; i += 1) {
ConstExprValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
op_elem_val, UndefOk)))
{
return ira->codegen->invalid_instruction;
}
ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
result_elem_val->type = int_type;
result_elem_val->special = op_elem_val->special;
if (op_elem_val->special == ConstValSpecialUndef)
continue;
bigint_write_twos_complement(&op_elem_val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
bigint_read_twos_complement(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint,
buf, int_type->data.integral.bit_count, false,
int_type->data.integral.is_signed);
}
} else {
bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
int_type->data.integral.is_signed);
}
free(buf);
return result;
}
IrInstruction *result = ir_build_bswap(&ira->new_irb, instruction->base.scope,
instruction->base.source_node, nullptr, op);
result->value.type = int_type;
result->value.type = op_type;
return result;
}

View File

@ -1,32 +1,62 @@
const std = @import("std");
const expect = std.testing.expect;
test "@byteSwap" {
comptime testByteSwap();
testByteSwap();
test "@byteSwap integers" {
const ByteSwapIntTest = struct {
fn run() void {
t(u0, 0, 0);
t(u8, 0x12, 0x12);
t(u16, 0x1234, 0x3412);
t(u24, 0x123456, 0x563412);
t(u32, 0x12345678, 0x78563412);
t(u40, 0x123456789a, 0x9a78563412);
t(i48, 0x123456789abc, @bitCast(i48, u48(0xbc9a78563412)));
t(u56, 0x123456789abcde, 0xdebc9a78563412);
t(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
t(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
t(u0, u0(0), 0);
t(i8, i8(-50), -50);
t(i16, @bitCast(i16, u16(0x1234)), @bitCast(i16, u16(0x3412)));
t(i24, @bitCast(i24, u24(0x123456)), @bitCast(i24, u24(0x563412)));
t(i32, @bitCast(i32, u32(0x12345678)), @bitCast(i32, u32(0x78563412)));
t(u40, @bitCast(i40, u40(0x123456789a)), u40(0x9a78563412));
t(i48, @bitCast(i48, u48(0x123456789abc)), @bitCast(i48, u48(0xbc9a78563412)));
t(i56, @bitCast(i56, u56(0x123456789abcde)), @bitCast(i56, u56(0xdebc9a78563412)));
t(i64, @bitCast(i64, u64(0x123456789abcdef1)), @bitCast(i64, u64(0xf1debc9a78563412)));
t(
i128,
@bitCast(i128, u128(0x123456789abcdef11121314151617181)),
@bitCast(i128, u128(0x8171615141312111f1debc9a78563412)),
);
}
fn t(comptime I: type, input: I, expected_output: I) void {
std.testing.expectEqual(expected_output, @byteSwap(I, input));
}
};
comptime ByteSwapIntTest.run();
ByteSwapIntTest.run();
}
fn testByteSwap() void {
expect(@byteSwap(u0, 0) == 0);
expect(@byteSwap(u8, 0x12) == 0x12);
expect(@byteSwap(u16, 0x1234) == 0x3412);
expect(@byteSwap(u24, 0x123456) == 0x563412);
expect(@byteSwap(u32, 0x12345678) == 0x78563412);
expect(@byteSwap(u40, 0x123456789a) == 0x9a78563412);
expect(@byteSwap(i48, 0x123456789abc) == @bitCast(i48, u48(0xbc9a78563412)));
expect(@byteSwap(u56, 0x123456789abcde) == 0xdebc9a78563412);
expect(@byteSwap(u64, 0x123456789abcdef1) == 0xf1debc9a78563412);
expect(@byteSwap(u128, 0x123456789abcdef11121314151617181) == 0x8171615141312111f1debc9a78563412);
test "@byteSwap vectors" {
const ByteSwapVectorTest = struct {
fn run() void {
t(u8, 2, [_]u8{ 0x12, 0x13 }, [_]u8{ 0x12, 0x13 });
t(u16, 2, [_]u16{ 0x1234, 0x2345 }, [_]u16{ 0x3412, 0x4523 });
t(u24, 2, [_]u24{ 0x123456, 0x234567 }, [_]u24{ 0x563412, 0x674523 });
}
expect(@byteSwap(u0, u0(0)) == 0);
expect(@byteSwap(i8, i8(-50)) == -50);
expect(@byteSwap(i16, @bitCast(i16, u16(0x1234))) == @bitCast(i16, u16(0x3412)));
expect(@byteSwap(i24, @bitCast(i24, u24(0x123456))) == @bitCast(i24, u24(0x563412)));
expect(@byteSwap(i32, @bitCast(i32, u32(0x12345678))) == @bitCast(i32, u32(0x78563412)));
expect(@byteSwap(u40, @bitCast(i40, u40(0x123456789a))) == u40(0x9a78563412));
expect(@byteSwap(i48, @bitCast(i48, u48(0x123456789abc))) == @bitCast(i48, u48(0xbc9a78563412)));
expect(@byteSwap(i56, @bitCast(i56, u56(0x123456789abcde))) == @bitCast(i56, u56(0xdebc9a78563412)));
expect(@byteSwap(i64, @bitCast(i64, u64(0x123456789abcdef1))) == @bitCast(i64, u64(0xf1debc9a78563412)));
expect(@byteSwap(i128, @bitCast(i128, u128(0x123456789abcdef11121314151617181))) ==
@bitCast(i128, u128(0x8171615141312111f1debc9a78563412)));
fn t(
comptime I: type,
comptime n: comptime_int,
input: @Vector(n, I),
expected_vector: @Vector(n, I),
) void {
const actual_output: [n]I = @byteSwap(I, input);
const expected_output: [n]I = expected_vector;
std.testing.expectEqual(expected_output, actual_output);
}
};
comptime ByteSwapVectorTest.run();
ByteSwapVectorTest.run();
}