diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index e74480784e..b2067c4f8f 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -602,9 +602,9 @@ fn testUtf8IteratorOnAscii() !void { const s = Utf8View.initComptime("abc"); var it1 = s.iterator(); - try testing.expect(std.mem.eql(u8, "a", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "b", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "c", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "a", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "b", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "c", it1.nextCodepointSlice().?)); try testing.expect(it1.nextCodepointSlice() == null); var it2 = s.iterator(); @@ -632,9 +632,9 @@ fn testUtf8ViewOk() !void { const s = Utf8View.initComptime("東京市"); var it1 = s.iterator(); - try testing.expect(std.mem.eql(u8, "東", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "京", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "市", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "東", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "京", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "市", it1.nextCodepointSlice().?)); try testing.expect(it1.nextCodepointSlice() == null); var it2 = s.iterator(); @@ -772,20 +772,20 @@ fn testUtf8Peeking() !void { const s = Utf8View.initComptime("noël"); var it = s.iterator(); - try testing.expect(std.mem.eql(u8, "n", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "n", it.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "o", it.peek(1))); - try testing.expect(std.mem.eql(u8, "oë", it.peek(2))); - try testing.expect(std.mem.eql(u8, "oël", it.peek(3))); - try testing.expect(std.mem.eql(u8, "oël", it.peek(4))); - try testing.expect(std.mem.eql(u8, "oël", it.peek(10))); + try testing.expect(mem.eql(u8, "o", it.peek(1))); + try testing.expect(mem.eql(u8, "oë", it.peek(2))); + try testing.expect(mem.eql(u8, "oël", it.peek(3))); + try testing.expect(mem.eql(u8, "oël", it.peek(4))); + try testing.expect(mem.eql(u8, "oël", it.peek(10))); - try testing.expect(std.mem.eql(u8, "o", it.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "ë", it.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "l", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "o", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "ë", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "l", it.nextCodepointSlice().?)); try testing.expect(it.nextCodepointSlice() == null); - try testing.expect(std.mem.eql(u8, &[_]u8{}, it.peek(1))); + try testing.expect(mem.eql(u8, &[_]u8{}, it.peek(1))); } fn testError(bytes: []const u8, expected_err: anyerror) !void { @@ -927,20 +927,16 @@ test "fmtUtf8" { } fn utf16LeToUtf8ArrayListImpl( - array_list: *std.ArrayList(u8), + result: *std.ArrayList(u8), utf16le: []const u16, comptime surrogates: Surrogates, ) (switch (surrogates) { .cannot_encode_surrogate_half => Utf16LeToUtf8AllocError, .can_encode_surrogate_half => mem.Allocator.Error, })!void { - // optimistically guess that it will all be ascii. - try array_list.ensureTotalCapacityPrecise(utf16le.len); + assert(result.capacity >= utf16le.len); var remaining = utf16le; - if (builtin.zig_backend != .stage2_x86_64 or - comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and - !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) vectorized: { const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u16); @@ -948,41 +944,33 @@ fn utf16LeToUtf8ArrayListImpl( // Fast path. Check for and encode ASCII characters at the start of the input. while (remaining.len >= chunk_len) { const chunk: Chunk = remaining[0..chunk_len].*; - const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F)); + const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F)); if (@reduce(.Or, chunk | mask != mask)) { // found a non ASCII code unit break; } - const chunk_byte_len = chunk_len * 2; - const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*; - const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes); - const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0]; + const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk)); // We allocated enough space to encode every UTF-16 code unit // as ASCII, so if the entire string is ASCII then we are // guaranteed to have enough space allocated - array_list.appendSliceAssumeCapacity(&ascii_bytes); + result.addManyAsArrayAssumeCapacity(chunk_len).* = ascii_chunk; remaining = remaining[chunk_len..]; } } - var out_index: usize = array_list.items.len; switch (surrogates) { .cannot_encode_surrogate_half => { var it = Utf16LeIterator.init(remaining); while (try it.nextCodepoint()) |codepoint| { const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable; - try array_list.resize(array_list.items.len + utf8_len); - assert((utf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len); - out_index += utf8_len; + assert((utf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len); } }, .can_encode_surrogate_half => { var it = Wtf16LeIterator.init(remaining); while (it.nextCodepoint()) |codepoint| { const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable; - try array_list.resize(array_list.items.len + utf8_len); - assert((wtf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len); - out_index += utf8_len; + assert((wtf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len); } }, } @@ -990,8 +978,9 @@ fn utf16LeToUtf8ArrayListImpl( pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error; -pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void { - return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half); +pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void { + try result.ensureTotalCapacityPrecise(utf16le.len); + return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half); } /// Deprecated; renamed to utf16LeToUtf8Alloc @@ -1003,8 +992,7 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len); errdefer result.deinit(); - try utf16LeToUtf8ArrayList(&result, utf16le); - + try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half); return result.toOwnedSlice(); } @@ -1017,8 +1005,7 @@ pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16 var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1); errdefer result.deinit(); - try utf16LeToUtf8ArrayList(&result, utf16le); - + try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half); return result.toOwnedSliceSentinel(0); } @@ -1030,12 +1017,9 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr .cannot_encode_surrogate_half => Utf16LeToUtf8Error, .can_encode_surrogate_half => error{}, })!usize { - var end_index: usize = 0; + var dest_index: usize = 0; var remaining = utf16le; - if (builtin.zig_backend != .stage2_x86_64 or - comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and - !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) vectorized: { const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u16); @@ -1043,17 +1027,14 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr // Fast path. Check for and encode ASCII characters at the start of the input. while (remaining.len >= chunk_len) { const chunk: Chunk = remaining[0..chunk_len].*; - const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F)); + const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F)); if (@reduce(.Or, chunk | mask != mask)) { // found a non ASCII code unit break; } - const chunk_byte_len = chunk_len * 2; - const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*; - const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes); - const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0]; - @memcpy(utf8[end_index .. end_index + chunk_len], &ascii_bytes); - end_index += chunk_len; + const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk)); + utf8[dest_index..][0..chunk_len].* = ascii_chunk; + dest_index += chunk_len; remaining = remaining[chunk_len..]; } } @@ -1062,7 +1043,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr .cannot_encode_surrogate_half => { var it = Utf16LeIterator.init(remaining); while (try it.nextCodepoint()) |codepoint| { - end_index += utf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) { + dest_index += utf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) { // The maximum possible codepoint encoded by UTF-16 is U+10FFFF, // which is within the valid codepoint range. error.CodepointTooLarge => unreachable, @@ -1075,7 +1056,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr .can_encode_surrogate_half => { var it = Wtf16LeIterator.init(remaining); while (it.nextCodepoint()) |codepoint| { - end_index += wtf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) { + dest_index += wtf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) { // The maximum possible codepoint encoded by UTF-16 is U+10FFFF, // which is within the valid codepoint range. error.CodepointTooLarge => unreachable, @@ -1083,7 +1064,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr } }, } - return end_index; + return dest_index; } /// Deprecated; renamed to utf16LeToUtf8 @@ -1156,18 +1137,12 @@ test utf16LeToUtf8 { } } -fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void { - // optimistically guess that it will not require surrogate pairs - try array_list.ensureTotalCapacityPrecise(utf8.len); +fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void { + assert(result.capacity >= utf8.len); var remaining = utf8; - // Need support for std.simd.interlace - if ((builtin.zig_backend != .stage2_x86_64 or - comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and - !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and - comptime !builtin.cpu.arch.isMIPS()) vectorized: { - const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2); + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1178,9 +1153,8 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, // found a non ASCII code unit break; } - const zeroes: Chunk = @splat(0); - const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes }); - array_list.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk)); + const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk); + result.addManyAsArrayAssumeCapacity(chunk_len).* = utf16_chunk; remaining = remaining[chunk_len..]; } } @@ -1192,21 +1166,18 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, var it = view.iterator(); while (it.nextCodepoint()) |codepoint| { if (codepoint < 0x10000) { - const short = @as(u16, @intCast(codepoint)); - try array_list.append(mem.nativeToLittle(u16, short)); + try result.append(mem.nativeToLittle(u16, @intCast(codepoint))); } else { const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; - var out: [2]u16 = undefined; - out[0] = mem.nativeToLittle(u16, high); - out[1] = mem.nativeToLittle(u16, low); - try array_list.appendSlice(out[0..]); + try result.appendSlice(&.{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) }); } } } -pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void { - return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half); +pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void { + try result.ensureTotalCapacityPrecise(utf8.len); + return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half); } pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 { @@ -1215,7 +1186,6 @@ pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ Inv errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half); - return result.toOwnedSlice(); } @@ -1228,7 +1198,6 @@ pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ In errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half); - return result.toOwnedSliceSentinel(0); } @@ -1239,16 +1208,11 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize } pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize { - var dest_i: usize = 0; + var dest_index: usize = 0; var remaining = utf8; - // Need support for std.simd.interlace - if ((builtin.zig_backend != .stage2_x86_64 or - comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and - !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and - comptime !builtin.cpu.arch.isMIPS()) vectorized: { - const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2); + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1259,57 +1223,60 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: // found a non ASCII code unit break; } - const zeroes: Chunk = @splat(0); - const utf16_bytes: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes }); - @memcpy(utf16le[dest_i..][0..chunk_len], std.mem.bytesAsSlice(u16, &utf16_bytes)); - dest_i += chunk_len; + const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk); + utf16le[dest_index..][0..chunk_len].* = utf16_chunk; + dest_index += chunk_len; remaining = remaining[chunk_len..]; } } - var src_i: usize = 0; - while (src_i < remaining.len) { - const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) { - .cannot_encode_surrogate_half => error.InvalidUtf8, - .can_encode_surrogate_half => error.InvalidWtf8, - }; - const next_src_i = src_i + n; - const codepoint = switch (surrogates) { - .cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8, - .can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8, - }; + const view = switch (surrogates) { + .cannot_encode_surrogate_half => try Utf8View.init(remaining), + .can_encode_surrogate_half => try Wtf8View.init(remaining), + }; + var it = view.iterator(); + while (it.nextCodepoint()) |codepoint| { if (codepoint < 0x10000) { - const short = @as(u16, @intCast(codepoint)); - utf16le[dest_i] = mem.nativeToLittle(u16, short); - dest_i += 1; + utf16le[dest_index] = mem.nativeToLittle(u16, @intCast(codepoint)); + dest_index += 1; } else { const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; - utf16le[dest_i] = mem.nativeToLittle(u16, high); - utf16le[dest_i + 1] = mem.nativeToLittle(u16, low); - dest_i += 2; + utf16le[dest_index..][0..2].* = .{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) }; + dest_index += 2; } - src_i = next_src_i; } - return dest_i; + return dest_index; } test "utf8ToUtf16Le" { - var utf16le: [2]u16 = [_]u16{0} ** 2; + var utf16le: [128]u16 = undefined; { const length = try utf8ToUtf16Le(utf16le[0..], "𐐷"); - try testing.expectEqual(@as(usize, 2), length); - try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..])); + try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..length])); } { const length = try utf8ToUtf16Le(utf16le[0..], "\u{10FFFF}"); - try testing.expectEqual(@as(usize, 2), length); - try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..])); + try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..length])); } { const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80"); try testing.expectError(error.InvalidUtf8, result); } + { + const length = try utf8ToUtf16Le(utf16le[0..], "This string has been designed to test the vectorized implementat" ++ + "ion by beginning with one hundred twenty-seven ASCII characters¡"); + try testing.expectEqualSlices(u8, &.{ + 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0, + 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0, + ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0, + 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0, + 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0, + ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0, + 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0, + 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0, + }, mem.sliceAsBytes(utf16le[0..length])); + } } test utf8ToUtf16LeArrayList { @@ -1354,25 +1321,40 @@ test utf8ToUtf16LeAllocZ { { const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷"); defer testing.allocator.free(utf16); - try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..])); + try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16)); try testing.expect(utf16[2] == 0); } { const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}"); defer testing.allocator.free(utf16); - try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..])); + try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16)); try testing.expect(utf16[2] == 0); } { const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80"); try testing.expectError(error.InvalidUtf8, result); } + { + const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "This string has been designed to test the vectorized implementat" ++ + "ion by beginning with one hundred twenty-seven ASCII characters¡"); + defer testing.allocator.free(utf16); + try testing.expectEqualSlices(u8, &.{ + 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0, + 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0, + ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0, + 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0, + 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0, + ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0, + 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0, + 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0, + }, mem.sliceAsBytes(utf16)); + } } /// Converts a UTF-8 string literal into a UTF-16LE string literal. -pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch unreachable:0]u16 { +pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch |err| @compileError(err):0]u16 { return comptime blk: { - const len: usize = calcUtf16LeLen(utf8) catch |err| @compileError(err); + const len: usize = calcUtf16LeLen(utf8) catch unreachable; var utf16le: [len:0]u16 = [_:0]u16{0} ** len; const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err); assert(len == utf16le_len); @@ -1453,12 +1435,12 @@ test "fmtUtf16Le" { try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))}); try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))}); try expectFmt("𐐷", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("𐐷"))}); - try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})}); - try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})}); + try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})}); + try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})}); } test "utf8ToUtf16LeStringLiteral" { @@ -1701,8 +1683,9 @@ pub const Wtf8Iterator = struct { } }; -pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void { - return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .can_encode_surrogate_half); +pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void { + try result.ensureTotalCapacityPrecise(utf16le.len); + return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half); } /// Caller must free returned memory. @@ -1711,8 +1694,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Al var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len); errdefer result.deinit(); - try wtf16LeToWtf8ArrayList(&result, wtf16le); - + try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half); return result.toOwnedSlice(); } @@ -1722,8 +1704,7 @@ pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.A var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1); errdefer result.deinit(); - try wtf16LeToWtf8ArrayList(&result, wtf16le); - + try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half); return result.toOwnedSliceSentinel(0); } @@ -1731,8 +1712,9 @@ pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize { return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {}; } -pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void { - return utf8ToUtf16LeArrayListImpl(array_list, wtf8, .can_encode_surrogate_half); +pub fn wtf8ToWtf16LeArrayList(result: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void { + try result.ensureTotalCapacityPrecise(wtf8.len); + return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half); } pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 { @@ -1741,7 +1723,6 @@ pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ Inv errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half); - return result.toOwnedSlice(); } @@ -1751,7 +1732,6 @@ pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ In errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half); - return result.toOwnedSliceSentinel(0); } @@ -1910,7 +1890,7 @@ pub const Wtf16LeIterator = struct { pub fn init(s: []const u16) Wtf16LeIterator { return Wtf16LeIterator{ - .bytes = std.mem.sliceAsBytes(s), + .bytes = mem.sliceAsBytes(s), .i = 0, }; } @@ -1923,12 +1903,12 @@ pub const Wtf16LeIterator = struct { assert(it.i <= it.bytes.len); if (it.i == it.bytes.len) return null; var code_units: [2]u16 = undefined; - code_units[0] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little); + code_units[0] = mem.readInt(u16, it.bytes[it.i..][0..2], .little); it.i += 2; surrogate_pair: { if (utf16IsHighSurrogate(code_units[0])) { if (it.i >= it.bytes.len) break :surrogate_pair; - code_units[1] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little); + code_units[1] = mem.readInt(u16, it.bytes[it.i..][0..2], .little); const codepoint = utf16DecodeSurrogatePair(&code_units) catch break :surrogate_pair; it.i += 2; return codepoint; @@ -2045,31 +2025,31 @@ fn testRoundtripWtf16(wtf16le: []const u16) !void { test "well-formed WTF-16 roundtrips" { try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD83D), // high surrogate - std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate + mem.nativeToLittle(u16, 0xD83D), // high surrogate + mem.nativeToLittle(u16, 0xDCA9), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD83D), // high surrogate - std.mem.nativeToLittle(u16, ' '), // not surrogate - std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate + mem.nativeToLittle(u16, 0xD83D), // high surrogate + mem.nativeToLittle(u16, ' '), // not surrogate + mem.nativeToLittle(u16, 0xDCA9), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD800), // high surrogate - std.mem.nativeToLittle(u16, 0xDBFF), // high surrogate + mem.nativeToLittle(u16, 0xD800), // high surrogate + mem.nativeToLittle(u16, 0xDBFF), // high surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD800), // high surrogate - std.mem.nativeToLittle(u16, 0xE000), // not surrogate + mem.nativeToLittle(u16, 0xD800), // high surrogate + mem.nativeToLittle(u16, 0xE000), // not surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD7FF), // not surrogate - std.mem.nativeToLittle(u16, 0xDC00), // low surrogate + mem.nativeToLittle(u16, 0xD7FF), // not surrogate + mem.nativeToLittle(u16, 0xDC00), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0x61), // not surrogate - std.mem.nativeToLittle(u16, 0xDC00), // low surrogate + mem.nativeToLittle(u16, 0x61), // not surrogate + mem.nativeToLittle(u16, 0xDC00), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xDC00), // low surrogate + mem.nativeToLittle(u16, 0xDC00), // low surrogate }); } diff --git a/src/InternPool.zig b/src/InternPool.zig index af1eb241a5..374acd0e67 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -7404,10 +7404,14 @@ pub fn isIntegerType(ip: *const InternPool, ty: Index) bool { .c_ulong_type, .c_longlong_type, .c_ulonglong_type, - .c_longdouble_type, .comptime_int_type, => true, - else => ip.indexToKey(ty) == .int_type, + else => switch (ip.items.items(.tag)[@intFromEnum(ty)]) { + .type_int_signed, + .type_int_unsigned, + => true, + else => false, + }, }; } diff --git a/src/Sema.zig b/src/Sema.zig index 0a38cf93dc..e1a9f7ee65 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -23328,7 +23328,8 @@ fn checkVectorElemType( const mod = sema.mod; switch (ty.zigTypeTag(mod)) { .Int, .Float, .Bool => return, - else => if (ty.isPtrAtRuntime(mod)) return, + .Optional, .Pointer => if (ty.isPtrAtRuntime(mod)) return, + else => {}, } return sema.fail(block, ty_src, "expected integer, float, bool, or pointer for the vector element type; found '{}'", .{ty.fmt(mod)}); } @@ -28455,7 +28456,7 @@ const CoerceOpts = struct { report_err: bool = true, /// Ignored if `report_err == false`. is_ret: bool = false, - /// Should coercion to comptime_int ermit an error message. + /// Should coercion to comptime_int emit an error message. no_cast_to_comptime_int: bool = false, param_src: struct { @@ -31858,6 +31859,34 @@ fn coerceArrayLike( } const dest_elem_ty = dest_ty.childType(mod); + if (dest_ty.isVector(mod) and inst_ty.isVector(mod) and (try sema.resolveValue(inst)) == null) { + const inst_elem_ty = inst_ty.childType(mod); + switch (dest_elem_ty.zigTypeTag(mod)) { + .Int => if (inst_elem_ty.isInt(mod)) { + // integer widening + const dst_info = dest_elem_ty.intInfo(mod); + const src_info = inst_elem_ty.intInfo(mod); + if ((src_info.signedness == dst_info.signedness and dst_info.bits >= src_info.bits) or + // small enough unsigned ints can get casted to large enough signed ints + (dst_info.signedness == .signed and dst_info.bits > src_info.bits)) + { + try sema.requireRuntimeBlock(block, inst_src, null); + return block.addTyOp(.intcast, dest_ty, inst); + } + }, + .Float => if (inst_elem_ty.isRuntimeFloat()) { + // float widening + const src_bits = inst_elem_ty.floatBits(target); + const dst_bits = dest_elem_ty.floatBits(target); + if (dst_bits >= src_bits) { + try sema.requireRuntimeBlock(block, inst_src, null); + return block.addTyOp(.fpext, dest_ty, inst); + } + }, + else => {}, + } + } + const element_vals = try sema.arena.alloc(InternPool.Index, dest_len); const element_refs = try sema.arena.alloc(Air.Inst.Ref, dest_len); var runtime_src: ?LazySrcLoc = null; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index a69269239f..ffc81988fc 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2853,11 +2853,14 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.comp.module.?; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const dst_ty = self.typeOfIndex(inst); - const dst_bits = dst_ty.floatBits(self.target.*); + const dst_scalar_ty = dst_ty.scalarType(mod); + const dst_bits = dst_scalar_ty.floatBits(self.target.*); const src_ty = self.typeOf(ty_op.operand); - const src_bits = src_ty.floatBits(self.target.*); + const src_scalar_ty = src_ty.scalarType(mod); + const src_bits = src_scalar_ty.floatBits(self.target.*); const result = result: { if (switch (src_bits) { @@ -2881,94 +2884,290 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { }, else => unreachable, }) { + if (dst_ty.isVector(mod)) break :result null; var callee_buf: ["__extend?f?f2".len]u8 = undefined; break :result try self.genCall(.{ .lib = .{ - .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(), - .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()}, + .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(), + .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()}, .callee = std.fmt.bufPrint(&callee_buf, "__extend{c}f{c}f2", .{ floatCompilerRtAbiName(src_bits), floatCompilerRtAbiName(dst_bits), }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}); + } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}); } + const src_abi_size: u32 = @intCast(src_ty.abiSize(mod)); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?.to128(); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(mod), 16))); const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const vec_len = if (dst_ty.isVector(mod)) dst_ty.vectorLen(mod) else 1; if (src_bits == 16) { assert(self.hasFeature(.f16c)); const mat_src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister( + .{ .v_ps, .cvtph2 }, + dst_alias, + registerAlias(mat_src_reg, src_abi_size), + ); switch (dst_bits) { 32 => {}, 64 => try self.asmRegisterRegisterRegister( .{ .v_sd, .cvtss2 }, - dst_reg, - dst_reg, - dst_reg, + dst_alias, + dst_alias, + dst_alias, ), else => unreachable, } } else { assert(src_bits == 32 and dst_bits == 64); - if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ .v_sd, .cvtss2 }, - dst_reg, - dst_reg, - try src_mcv.mem(self, .dword), - ) else try self.asmRegisterRegisterRegister( - .{ .v_sd, .cvtss2 }, - dst_reg, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv)).to128(), - ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._sd, .cvtss2 }, - dst_reg, - try src_mcv.mem(self, .dword), + if (self.hasFeature(.avx)) switch (vec_len) { + 1 => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .{ .v_sd, .cvtss2 }, + dst_alias, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), + ) else try self.asmRegisterRegisterRegister( + .{ .v_sd, .cvtss2 }, + dst_alias, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), + ), + 2...4 => if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_pd, .cvtps2 }, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), + ) else try self.asmRegisterRegister( + .{ .v_pd, .cvtps2 }, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), + ), + else => break :result null, + } else if (src_mcv.isMemory()) try self.asmRegisterMemory( + switch (vec_len) { + 1 => .{ ._sd, .cvtss2 }, + 2 => .{ ._pd, .cvtps2 }, + else => break :result null, + }, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), ) else try self.asmRegisterRegister( - .{ ._sd, .cvtss2 }, - dst_reg, - (if (src_mcv.isRegister()) + switch (vec_len) { + 1 => .{ ._sd, .cvtss2 }, + 2 => .{ ._pd, .cvtps2 }, + else => break :result null, + }, + dst_alias, + registerAlias(if (src_mcv.isRegister()) src_mcv.getReg().? else - try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), ); } break :result dst_mcv; - }; + } orelse return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(mod), dst_ty.fmt(mod), + }); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.comp.module.?; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result: MCValue = result: { - const src_ty = self.typeOf(ty_op.operand); + const src_ty = self.typeOf(ty_op.operand); + const dst_ty = self.typeOfIndex(inst); + + const result = @as(?MCValue, result: { + const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod)); + const src_int_info = src_ty.intInfo(mod); - - const dst_ty = self.typeOfIndex(inst); const dst_int_info = dst_ty.intInfo(mod); - const abi_size: u32 = @intCast(dst_ty.abiSize(mod)); - - const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; const extend = switch (src_int_info.signedness) { .signed => dst_int_info, .unsigned => src_int_info, }.signedness; const src_mcv = try self.resolveInst(ty_op.operand); + if (dst_ty.isVector(mod)) { + const src_abi_size: u32 = @intCast(src_ty.abiSize(mod)); + const max_abi_size = @max(dst_abi_size, src_abi_size); + if (max_abi_size > @as(u32, if (self.hasFeature(.avx2)) 32 else 16)) break :result null; + const has_avx = self.hasFeature(.avx); + + const dst_elem_abi_size = dst_ty.childType(mod).abiSize(mod); + const src_elem_abi_size = src_ty.childType(mod).abiSize(mod); + switch (math.order(dst_elem_abi_size, src_elem_abi_size)) { + .lt => { + const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { + else => break :result null, + 1 => switch (src_elem_abi_size) { + else => break :result null, + 2 => switch (dst_int_info.signedness) { + .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw }, + .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, + }, + }, + 2 => switch (src_elem_abi_size) { + else => break :result null, + 4 => switch (dst_int_info.signedness) { + .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd }, + .unsigned => if (has_avx) + .{ .vp_w, .ackusd } + else if (self.hasFeature(.sse4_1)) + .{ .p_w, .ackusd } + else + break :result null, + }, + }, + }; + + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else if (has_avx and src_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, dst_abi_size); + + if (has_avx) try self.asmRegisterRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + dst_reg, src_abi_size), + dst_alias, + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + dst_alias, + ); + break :result dst_mcv; + }, + .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + break :result src_mcv + else { + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); + break :result dst_mcv; + }, + .gt => if (self.hasFeature(.sse4_1)) { + const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) { + else => break :result null, + 2 => if (has_avx) .vp_w else .p_w, + 4 => if (has_avx) .vp_d else .p_d, + 8 => if (has_avx) .vp_q else .p_q, + }, switch (src_elem_abi_size) { + else => break :result null, + 1 => switch (extend) { + .signed => .movsxb, + .unsigned => .movzxb, + }, + 2 => switch (extend) { + .signed => .movsxw, + .unsigned => .movzxw, + }, + 4 => switch (extend) { + .signed => .movsxd, + .unsigned => .movzxd, + }, + } }; + + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }; + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, dst_abi_size); + + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), + ); + break :result dst_mcv; + } else { + const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { + else => break :result null, + 2 => switch (src_elem_abi_size) { + else => break :result null, + 1 => .{ .p_, .unpcklbw }, + }, + 4 => switch (src_elem_abi_size) { + else => break :result null, + 2 => .{ .p_, .unpcklwd }, + }, + 8 => switch (src_elem_abi_size) { + else => break :result null, + 2 => .{ .p_, .unpckldq }, + }, + }; + + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + + const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const ext_alias = registerAlias(ext_reg, src_abi_size); + const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg); + defer self.register_manager.unlockReg(ext_lock); + + try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias); + switch (extend) { + .signed => try self.asmRegisterRegister( + .{ switch (src_elem_abi_size) { + else => unreachable, + 1 => .p_b, + 2 => .p_w, + 4 => .p_d, + }, .cmpgt }, + ext_alias, + registerAlias(dst_reg, src_abi_size), + ), + .unsigned => {}, + } + try self.asmRegisterRegister( + mir_tag, + registerAlias(dst_reg, dst_abi_size), + registerAlias(ext_reg, dst_abi_size), + ); + break :result dst_mcv; + }, + } + @compileError("unreachable"); + } + + const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; + const src_storage_bits: u16 = switch (src_mcv) { .register, .register_offset => 64, .register_pair => 128, @@ -2986,13 +3185,13 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { }; if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister()) - .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) } + .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) } else dst_mcv; if (dst_mcv.isRegister()) { try self.truncateRegister(src_ty, dst_mcv.getReg().?); - break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }; + break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; } const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable; @@ -3040,7 +3239,9 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { ); break :result dst_mcv; - }; + }) orelse return self.fail("TODO implement airIntCast from {} to {}", .{ + src_ty.fmt(mod), dst_ty.fmt(mod), + }); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -3063,7 +3264,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { src_mcv else if (dst_abi_size <= 8) try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv) - else if (dst_abi_size <= 16) dst: { + else if (dst_abi_size <= 16 and !dst_ty.isVector(mod)) dst: { const dst_regs = try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); const dst_mcv: MCValue = .{ .register_pair = dst_regs }; @@ -3080,19 +3281,22 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { if (dst_ty.zigTypeTag(mod) == .Vector) { assert(src_ty.zigTypeTag(mod) == .Vector and dst_ty.vectorLen(mod) == src_ty.vectorLen(mod)); - const dst_info = dst_ty.childType(mod).intInfo(mod); - const src_info = src_ty.childType(mod).intInfo(mod); - const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_info.bits) { - 8 => switch (src_info.bits) { - 16 => switch (dst_ty.vectorLen(mod)) { + const dst_elem_ty = dst_ty.childType(mod); + const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(mod)); + const src_elem_ty = src_ty.childType(mod); + const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(mod)); + + const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) { + 1 => switch (src_elem_abi_size) { + 2 => switch (dst_ty.vectorLen(mod)) { 1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, 9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null, else => null, }, else => null, }, - 16 => switch (src_info.bits) { - 32 => switch (dst_ty.vectorLen(mod)) { + 2 => switch (src_elem_abi_size) { + 4 => switch (dst_ty.vectorLen(mod)) { 1...4 => if (self.hasFeature(.avx)) .{ .vp_w, .ackusd } else if (self.hasFeature(.sse4_1)) @@ -3107,12 +3311,14 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { else => null, }) orelse return self.fail("TODO implement airTrunc for {}", .{dst_ty.fmt(mod)}); - const elem_ty = src_ty.childType(mod); - const mask_val = try mod.intValue(elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); + const dst_info = dst_elem_ty.intInfo(mod); + const src_info = src_elem_ty.intInfo(mod); + + const mask_val = try mod.intValue(src_elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); const splat_ty = try mod.vectorType(.{ .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)), - .child = elem_ty.ip_index, + .child = src_elem_ty.ip_index, }); const splat_abi_size: u32 = @intCast(splat_ty.abiSize(mod)); @@ -4086,7 +4292,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (dst_info.bits > 128 and dst_info.signedness == .unsigned) { const slow_inc = self.hasFeature(.slow_incdec); const abi_size: u32 = @intCast(dst_ty.abiSize(mod)); - const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; + const limb_len = math.divCeil(u32, abi_size, 8) catch unreachable; try self.spillRegisters(&.{ .rax, .rcx, .rdx }); const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); @@ -6935,7 +7141,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { }, else => { const abi_size: u31 = @intCast(ty.abiSize(mod)); - const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable; + const limb_len = math.divCeil(u31, abi_size, 8) catch unreachable; const tmp_regs = try self.register_manager.allocRegs(3, .{null} ** 3, abi.RegisterClass.gp); @@ -8222,7 +8428,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .@"and" }, .cl, - Immediate.u(std.math.maxInt(u6)), + Immediate.u(math.maxInt(u6)), ); try self.asmRegisterImmediate( .{ ._r, .sh }, @@ -8259,7 +8465,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .@"and" }, .cl, - Immediate.u(std.math.maxInt(u6)), + Immediate.u(math.maxInt(u6)), ); try self.asmRegisterImmediate( .{ ._r, .sh }, @@ -8324,7 +8530,7 @@ fn genShiftBinOpMir( }, .sh }, temp_regs[2].to64(), temp_regs[3].to64(), - Immediate.u(shift_imm & std.math.maxInt(u6)), + Immediate.u(shift_imm & math.maxInt(u6)), ), else => try self.asmRegisterRegisterRegister(.{ switch (tag[0]) { ._l => ._ld, @@ -8379,7 +8585,7 @@ fn genShiftBinOpMir( .immediate => |shift_imm| try self.asmRegisterImmediate( tag, temp_regs[2].to64(), - Immediate.u(shift_imm & std.math.maxInt(u6)), + Immediate.u(shift_imm & math.maxInt(u6)), ), else => try self.asmRegisterRegister(tag, temp_regs[2].to64(), .cl), } @@ -8974,7 +9180,7 @@ fn genMulDivBinOp( switch (tag) { .mul, .mul_wrap => { const slow_inc = self.hasFeature(.slow_incdec); - const limb_len = std.math.divCeil(u32, src_abi_size, 8) catch unreachable; + const limb_len = math.divCeil(u32, src_abi_size, 8) catch unreachable; try self.spillRegisters(&.{ .rax, .rcx, .rdx }); const reg_locks = self.register_manager.lockRegs(3, .{ .rax, .rcx, .rdx }); @@ -14535,7 +14741,7 @@ fn genSetReg( ty, dst_reg.class(), self.getFrameAddrAlignment(frame_addr).compare(.gte, Alignment.fromLog2Units( - std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), + math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), )), ), .lea_frame => .{ .move = .{ ._, .lea } }, @@ -16833,6 +17039,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { @intCast(mask_elem_val.toSignedInt(mod)); } + const has_avx = self.hasFeature(.avx); const result = @as(?MCValue, result: { for (mask_elems) |mask_elem| { if (mask_elem) |_| break; @@ -16858,7 +17065,6 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { break :result dst_mcv; } - const has_avx = self.hasFeature(.avx); shufpd: { if (elem_abi_size != 8) break :shufpd; if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd; diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 29c80fce5f..5aa6d1d4a2 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -335,6 +335,8 @@ pub const Mnemonic = enum { pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw, + pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq, + pmovzxbd, pmovzxbq, pmovzxbw, pmovzxdq, pmovzxwd, pmovzxwq, pmulld, roundpd, roundps, roundsd, roundss, // SSE4.2 @@ -387,6 +389,8 @@ pub const Mnemonic = enum { vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw, vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw, vpmovmskb, + vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq, + vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq, vpmulhw, vpmulld, vpmullw, vpor, vpshufb, vpshufd, vpshufhw, vpshuflw, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 6d08110322..5df5700004 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -658,6 +658,14 @@ pub const Inst = struct { /// Insert scalar single-precision floating-point value /// Insert packed floating-point values insert, + /// Packed move with sign extend + movsxb, + movsxd, + movsxw, + /// Packed move with zero extend + movzxb, + movzxd, + movzxw, /// Round packed single-precision floating-point values /// Round scalar single-precision floating-point value /// Round packed double-precision floating-point values diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index ac6d561ff3..99e0d96a44 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1235,6 +1235,20 @@ pub const table = [_]Entry{ .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 }, + .{ .pmovsxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .none, .sse4_1 }, + .{ .pmovsxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .none, .sse4_1 }, + .{ .pmovsxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .none, .sse4_1 }, + .{ .pmovsxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .none, .sse4_1 }, + .{ .pmovsxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .none, .sse4_1 }, + .{ .pmovsxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .none, .sse4_1 }, + + .{ .pmovzxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .none, .sse4_1 }, + .{ .pmovzxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .none, .sse4_1 }, + .{ .pmovzxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .none, .sse4_1 }, + .{ .pmovzxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .none, .sse4_1 }, + .{ .pmovzxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .none, .sse4_1 }, + .{ .pmovzxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .none, .sse4_1 }, + .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, @@ -1587,6 +1601,20 @@ pub const table = [_]Entry{ .{ .vpmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx }, .{ .vpmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .vex_128_wig, .avx }, + + .{ .vpmovzxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, @@ -1816,6 +1844,20 @@ pub const table = [_]Entry{ .{ .vpmovmskb, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 }, .{ .vpmovmskb, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxbw, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxbd, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxbq, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxwd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxwq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxdq, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmovzxbw, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxbd, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxbq, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxwd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxwq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxdq, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 }, .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 }, diff --git a/src/codegen/c.zig b/src/codegen/c.zig index cf372ff5ef..ce745fbbe5 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -6109,41 +6109,48 @@ fn airFloatCast(f: *Function, inst: Air.Inst.Index) !CValue { const ty_op = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const inst_ty = f.typeOfIndex(inst); + const inst_scalar_ty = inst_ty.scalarType(mod); const operand = try f.resolveInst(ty_op.operand); try reap(f, inst, &.{ty_op.operand}); const operand_ty = f.typeOf(ty_op.operand); + const scalar_ty = operand_ty.scalarType(mod); const target = f.object.dg.module.getTarget(); - const operation = if (inst_ty.isRuntimeFloat() and operand_ty.isRuntimeFloat()) - if (inst_ty.floatBits(target) < operand_ty.floatBits(target)) "trunc" else "extend" - else if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) - if (inst_ty.isSignedInt(mod)) "fix" else "fixuns" - else if (inst_ty.isRuntimeFloat() and operand_ty.isInt(mod)) - if (operand_ty.isSignedInt(mod)) "float" else "floatun" + const operation = if (inst_scalar_ty.isRuntimeFloat() and scalar_ty.isRuntimeFloat()) + if (inst_scalar_ty.floatBits(target) < scalar_ty.floatBits(target)) "trunc" else "extend" + else if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) + if (inst_scalar_ty.isSignedInt(mod)) "fix" else "fixuns" + else if (inst_scalar_ty.isRuntimeFloat() and scalar_ty.isInt(mod)) + if (scalar_ty.isSignedInt(mod)) "float" else "floatun" else unreachable; const writer = f.object.writer(); const local = try f.allocLocal(inst, inst_ty); + const v = try Vectorize.start(f, inst, writer, operand_ty); + const a = try Assignment.start(f, writer, scalar_ty); try f.writeCValue(writer, local, .Other); - - try writer.writeAll(" = "); - if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) { + try v.elem(f, writer); + try a.assign(f, writer); + if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) { try writer.writeAll("zig_wrap_"); - try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty); + try f.object.dg.renderTypeForBuiltinFnName(writer, inst_scalar_ty); try writer.writeByte('('); } try writer.writeAll("zig_"); try writer.writeAll(operation); - try writer.writeAll(compilerRtAbbrev(operand_ty, mod)); - try writer.writeAll(compilerRtAbbrev(inst_ty, mod)); + try writer.writeAll(compilerRtAbbrev(scalar_ty, mod)); + try writer.writeAll(compilerRtAbbrev(inst_scalar_ty, mod)); try writer.writeByte('('); try f.writeCValue(writer, operand, .FunctionArgument); + try v.elem(f, writer); try writer.writeByte(')'); - if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) { - try f.object.dg.renderBuiltinInfo(writer, inst_ty, .bits); + if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) { + try f.object.dg.renderBuiltinInfo(writer, inst_scalar_ty, .bits); try writer.writeByte(')'); } - try writer.writeAll(";\n"); + try a.end(f, writer); + try v.end(f, inst, writer); + return local; } diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 5ea749d6d9..e434260052 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -8648,8 +8648,6 @@ pub const FuncGen = struct { const operand_ty = self.typeOf(ty_op.operand); const dest_ty = self.typeOfIndex(inst); const target = mod.getTarget(); - const dest_bits = dest_ty.floatBits(target); - const src_bits = operand_ty.floatBits(target); if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) { return self.wip.cast(.fpext, operand, try o.lowerType(dest_ty), ""); @@ -8657,11 +8655,19 @@ pub const FuncGen = struct { const operand_llvm_ty = try o.lowerType(operand_ty); const dest_llvm_ty = try o.lowerType(dest_ty); + const dest_bits = dest_ty.scalarType(mod).floatBits(target); + const src_bits = operand_ty.scalarType(mod).floatBits(target); const fn_name = try o.builder.fmt("__extend{s}f{s}f2", .{ compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits), }); const libc_fn = try self.getLibcFunction(fn_name, &.{operand_llvm_ty}, dest_llvm_ty); + if (dest_ty.isVector(mod)) return self.buildElementwiseCall( + libc_fn, + &.{operand}, + try o.builder.poisonValue(dest_llvm_ty), + dest_ty.vectorLen(mod), + ); return self.wip.call( .normal, .ccc, diff --git a/src/type.zig b/src/type.zig index f27e157c31..7e570e3bdf 100644 --- a/src/type.zig +++ b/src/type.zig @@ -2134,7 +2134,8 @@ pub const Type = struct { /// Returns true if and only if the type is a fixed-width integer. pub fn isInt(self: Type, mod: *const Module) bool { - return self.isSignedInt(mod) or self.isUnsignedInt(mod); + return self.toIntern() != .comptime_int_type and + mod.intern_pool.isIntegerType(self.toIntern()); } /// Returns true if and only if the type is a fixed-width, signed integer. diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index c591a5a619..19e5ebb3c1 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -601,25 +601,25 @@ test "cast *[1][*]const u8 to [*]const ?[*]const u8" { test "@intCast on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const S = struct { fn doTheTest() !void { // Upcast (implicit, equivalent to @intCast) var up0: @Vector(2, u8) = [_]u8{ 0x55, 0xaa }; _ = &up0; - const up1 = @as(@Vector(2, u16), up0); - const up2 = @as(@Vector(2, u32), up0); - const up3 = @as(@Vector(2, u64), up0); + const up1: @Vector(2, u16) = up0; + const up2: @Vector(2, u32) = up0; + const up3: @Vector(2, u64) = up0; // Downcast (safety-checked) var down0 = up3; _ = &down0; - const down1 = @as(@Vector(2, u32), @intCast(down0)); - const down2 = @as(@Vector(2, u16), @intCast(down0)); - const down3 = @as(@Vector(2, u8), @intCast(down0)); + const down1: @Vector(2, u32) = @intCast(down0); + const down2: @Vector(2, u16) = @intCast(down0); + const down3: @Vector(2, u8) = @intCast(down0); try expect(mem.eql(u16, &@as([2]u16, up1), &[2]u16{ 0x55, 0xaa })); try expect(mem.eql(u32, &@as([2]u32, up2), &[2]u32{ 0x55, 0xaa })); @@ -629,20 +629,10 @@ test "@intCast on vector" { try expect(mem.eql(u16, &@as([2]u16, down2), &[2]u16{ 0x55, 0xaa })); try expect(mem.eql(u8, &@as([2]u8, down3), &[2]u8{ 0x55, 0xaa })); } - - fn doTheTestFloat() !void { - var vec: @Vector(2, f32) = @splat(1234.0); - _ = &vec; - const wider: @Vector(2, f64) = vec; - try expect(wider[0] == 1234.0); - try expect(wider[1] == 1234.0); - } }; try S.doTheTest(); try comptime S.doTheTest(); - try S.doTheTestFloat(); - try comptime S.doTheTestFloat(); } test "@floatCast cast down" { @@ -2340,10 +2330,31 @@ test "@floatCast on vector" { const S = struct { fn doTheTest() !void { - var a: @Vector(3, f64) = .{ 1.5, 2.5, 3.5 }; - _ = &a; - const b: @Vector(3, f32) = @floatCast(a); - try expectEqual(@Vector(3, f32){ 1.5, 2.5, 3.5 }, b); + { + var a: @Vector(2, f64) = .{ 1.5, 2.5 }; + _ = &a; + const b: @Vector(2, f32) = @floatCast(a); + try expectEqual(@Vector(2, f32){ 1.5, 2.5 }, b); + } + { + var a: @Vector(2, f32) = .{ 3.25, 4.25 }; + _ = &a; + const b: @Vector(2, f64) = @floatCast(a); + try expectEqual(@Vector(2, f64){ 3.25, 4.25 }, b); + } + { + var a: @Vector(2, f32) = .{ 5.75, 6.75 }; + _ = &a; + const b: @Vector(2, f64) = a; + try expectEqual(@Vector(2, f64){ 5.75, 6.75 }, b); + } + { + var vec: @Vector(2, f32) = @splat(1234.0); + _ = &vec; + const wider: @Vector(2, f64) = vec; + try expect(wider[0] == 1234.0); + try expect(wider[1] == 1234.0); + } } };