diff --git a/lib/std/Thread/Condition.zig b/lib/std/Thread/Condition.zig index ecbc25fdb0..898fc14520 100644 --- a/lib/std/Thread/Condition.zig +++ b/lib/std/Thread/Condition.zig @@ -324,8 +324,6 @@ test "Condition - wait and signal" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const num_threads = 4; const MultiWait = struct { @@ -371,8 +369,6 @@ test "Condition - signal" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const num_threads = 4; const SignalTest = struct { @@ -440,8 +436,6 @@ test "Condition - multi signal" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const num_threads = 4; const num_iterations = 4; @@ -504,8 +498,6 @@ test "Condition - broadcasting" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const num_threads = 10; const BroadcastTest = struct { @@ -573,8 +565,6 @@ test "Condition - broadcasting - wake all threads" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var num_runs: usize = 1; const num_threads = 10; diff --git a/lib/std/Thread/Mutex.zig b/lib/std/Thread/Mutex.zig index c6416113b5..0f618516b5 100644 --- a/lib/std/Thread/Mutex.zig +++ b/lib/std/Thread/Mutex.zig @@ -289,8 +289,6 @@ test "Mutex - many contended" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const num_threads = 4; const num_increments = 1000; diff --git a/lib/std/Thread/RwLock.zig b/lib/std/Thread/RwLock.zig index 2cd101f913..e77db10abb 100644 --- a/lib/std/Thread/RwLock.zig +++ b/lib/std/Thread/RwLock.zig @@ -297,8 +297,6 @@ test "RwLock - concurrent access" { if (builtin.single_threaded) return; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const num_writers: usize = 2; const num_readers: usize = 4; const num_writes: usize = 10000; diff --git a/lib/std/Thread/Semaphore.zig b/lib/std/Thread/Semaphore.zig index 0c04e8a859..1b182d4c2a 100644 --- a/lib/std/Thread/Semaphore.zig +++ b/lib/std/Thread/Semaphore.zig @@ -39,8 +39,6 @@ test "Thread.Semaphore" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const TestContext = struct { sem: *Semaphore, n: *i32, diff --git a/lib/std/atomic/Atomic.zig b/lib/std/atomic/Atomic.zig index 2abe7fc4ca..e38ada0c20 100644 --- a/lib/std/atomic/Atomic.zig +++ b/lib/std/atomic/Atomic.zig @@ -467,8 +467,6 @@ test "Atomic.fetchSub" { } test "Atomic.fetchMin" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - inline for (atomicIntTypes()) |Int| { inline for (atomic_rmw_orderings) |ordering| { var x = Atomic(Int).init(5); diff --git a/lib/std/atomic/queue.zig b/lib/std/atomic/queue.zig index c28daead1b..e8d37507d3 100644 --- a/lib/std/atomic/queue.zig +++ b/lib/std/atomic/queue.zig @@ -175,8 +175,6 @@ const puts_per_thread = 500; const put_thread_count = 3; test "std.atomic.Queue" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var plenty_of_memory = try std.heap.page_allocator.alloc(u8, 300 * 1024); defer std.heap.page_allocator.free(plenty_of_memory); diff --git a/lib/std/base64.zig b/lib/std/base64.zig index bca829de38..8d0effd05b 100644 --- a/lib/std/base64.zig +++ b/lib/std/base64.zig @@ -355,8 +355,6 @@ pub const Base64DecoderWithIgnore = struct { }; test "base64" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - @setEvalBranchQuota(8000); try testBase64(); try comptime testAllApis(standard, "comptime", "Y29tcHRpbWU="); @@ -377,8 +375,6 @@ test "base64 padding dest overflow" { } test "base64 url_safe_no_pad" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - @setEvalBranchQuota(8000); try testBase64UrlSafeNoPad(); try comptime testAllApis(url_safe_no_pad, "comptime", "Y29tcHRpbWU"); diff --git a/lib/std/bit_set.zig b/lib/std/bit_set.zig index d87c1d26f6..ee2a89cb68 100644 --- a/lib/std/bit_set.zig +++ b/lib/std/bit_set.zig @@ -1638,7 +1638,6 @@ fn testStaticBitSet(comptime Set: type) !void { test "IntegerBitSet" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; try testStaticBitSet(IntegerBitSet(0)); try testStaticBitSet(IntegerBitSet(1)); @@ -1651,8 +1650,6 @@ test "IntegerBitSet" { } test "ArrayBitSet" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - inline for (.{ 0, 1, 2, 31, 32, 33, 63, 64, 65, 254, 500, 3000 }) |size| { try testStaticBitSet(ArrayBitSet(u8, size)); try testStaticBitSet(ArrayBitSet(u16, size)); diff --git a/lib/std/compress/zstandard.zig b/lib/std/compress/zstandard.zig index 10eb878a1c..401496ca28 100644 --- a/lib/std/compress/zstandard.zig +++ b/lib/std/compress/zstandard.zig @@ -264,8 +264,6 @@ fn testReader(data: []const u8, comptime expected: []const u8) !void { } test "zstandard decompression" { - if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest; - const uncompressed = @embedFile("testdata/rfc8478.txt"); const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3"); const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19"); diff --git a/lib/std/crypto/25519/ed25519.zig b/lib/std/crypto/25519/ed25519.zig index faeab59a0b..c00ce4387e 100644 --- a/lib/std/crypto/25519/ed25519.zig +++ b/lib/std/crypto/25519/ed25519.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); const crypto = std.crypto; const debug = std.debug; const fmt = std.fmt; @@ -276,8 +275,8 @@ pub const Ed25519 = struct { pub fn fromSecretKey(secret_key: SecretKey) (NonCanonicalError || EncodingError || IdentityElementError)!KeyPair { // It is critical for EdDSA to use the correct public key. // In order to enforce this, a SecretKey implicitly includes a copy of the public key. - // In Debug mode, we can still afford checking that the public key is correct for extra safety. - if (builtin.mode == .Debug) { + // With runtime safety, we can still afford checking that the public key is correct. + if (std.debug.runtime_safety) { const pk_p = try Curve.fromBytes(secret_key.publicKeyBytes()); const recomputed_kp = try create(secret_key.seed()); debug.assert(mem.eql(u8, &recomputed_kp.public_key.toBytes(), &pk_p.toBytes())); @@ -493,8 +492,6 @@ test "ed25519 key pair creation" { } test "ed25519 signature" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var seed: [32]u8 = undefined; _ = try fmt.hexToBytes(seed[0..], "8052030376d47112be7f73ed7a019293dd12ad910b654455798b4667d73de166"); const key_pair = try Ed25519.KeyPair.create(seed); @@ -507,8 +504,6 @@ test "ed25519 signature" { } test "ed25519 batch verification" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var i: usize = 0; while (i < 100) : (i += 1) { const key_pair = try Ed25519.KeyPair.create(null); @@ -538,8 +533,6 @@ test "ed25519 batch verification" { } test "ed25519 test vectors" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const Vec = struct { msg_hex: *const [64:0]u8, public_key_hex: *const [64:0]u8, @@ -642,8 +635,6 @@ test "ed25519 test vectors" { } test "ed25519 with blind keys" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const BlindKeyPair = Ed25519.key_blinding.BlindKeyPair; // Create a standard Ed25519 key pair @@ -667,8 +658,6 @@ test "ed25519 with blind keys" { } test "ed25519 signatures with streaming" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const kp = try Ed25519.KeyPair.create(null); var signer = try kp.signer(null); diff --git a/lib/std/crypto/Certificate.zig b/lib/std/crypto/Certificate.zig index b7d24de0d6..9303e3c522 100644 --- a/lib/std/crypto/Certificate.zig +++ b/lib/std/crypto/Certificate.zig @@ -614,18 +614,18 @@ const Date = struct { }; pub fn parseTimeDigits(text: *const [2]u8, min: u8, max: u8) !u8 { - const nn: @Vector(2, u16) = .{ text[0], text[1] }; - const zero: @Vector(2, u16) = .{ '0', '0' }; - const mm: @Vector(2, u16) = .{ 10, 1 }; - const result = @reduce(.Add, (nn -% zero) *% mm); + const result = if (use_vectors) result: { + const nn: @Vector(2, u16) = .{ text[0], text[1] }; + const zero: @Vector(2, u16) = .{ '0', '0' }; + const mm: @Vector(2, u16) = .{ 10, 1 }; + break :result @reduce(.Add, (nn -% zero) *% mm); + } else std.fmt.parseInt(u8, text, 10) catch return error.CertificateTimeInvalid; if (result < min) return error.CertificateTimeInvalid; if (result > max) return error.CertificateTimeInvalid; return @truncate(result); } test parseTimeDigits { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const expectEqual = std.testing.expectEqual; try expectEqual(@as(u8, 0), try parseTimeDigits("00", 0, 99)); try expectEqual(@as(u8, 99), try parseTimeDigits("99", 0, 99)); @@ -638,17 +638,17 @@ test parseTimeDigits { } pub fn parseYear4(text: *const [4]u8) !u16 { - const nnnn: @Vector(4, u32) = .{ text[0], text[1], text[2], text[3] }; - const zero: @Vector(4, u32) = .{ '0', '0', '0', '0' }; - const mmmm: @Vector(4, u32) = .{ 1000, 100, 10, 1 }; - const result = @reduce(.Add, (nnnn -% zero) *% mmmm); + const result = if (use_vectors) result: { + const nnnn: @Vector(4, u32) = .{ text[0], text[1], text[2], text[3] }; + const zero: @Vector(4, u32) = .{ '0', '0', '0', '0' }; + const mmmm: @Vector(4, u32) = .{ 1000, 100, 10, 1 }; + break :result @reduce(.Add, (nnnn -% zero) *% mmmm); + } else std.fmt.parseInt(u16, text, 10) catch return error.CertificateTimeInvalid; if (result > 9999) return error.CertificateTimeInvalid; return @truncate(result); } test parseYear4 { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const expectEqual = std.testing.expectEqual; try expectEqual(@as(u16, 0), try parseYear4("0000")); try expectEqual(@as(u16, 9999), try parseYear4("9999")); @@ -1124,4 +1124,4 @@ pub const rsa = struct { } }; -const builtin = @import("builtin"); +const use_vectors = @import("builtin").zig_backend != .stage2_x86_64; diff --git a/lib/std/crypto/Certificate/Bundle.zig b/lib/std/crypto/Certificate/Bundle.zig index 1bffa325bd..b5a3832115 100644 --- a/lib/std/crypto/Certificate/Bundle.zig +++ b/lib/std/crypto/Certificate/Bundle.zig @@ -318,8 +318,6 @@ const MapContext = struct { test "scan for OS-provided certificates" { if (builtin.os.tag == .wasi) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var bundle: Bundle = .{}; defer bundle.deinit(std.testing.allocator); diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig index c4f709d631..d043099f50 100644 --- a/lib/std/crypto/aes.zig +++ b/lib/std/crypto/aes.zig @@ -28,8 +28,6 @@ pub const Aes128 = impl.Aes128; pub const Aes256 = impl.Aes256; test "ctr" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - // NIST SP 800-38A pp 55-58 const ctr = @import("modes.zig").ctr; diff --git a/lib/std/crypto/aes_gcm.zig b/lib/std/crypto/aes_gcm.zig index e276c9dfd8..980a234c54 100644 --- a/lib/std/crypto/aes_gcm.zig +++ b/lib/std/crypto/aes_gcm.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); const assert = std.debug.assert; const crypto = std.crypto; const debug = std.debug; @@ -42,7 +41,7 @@ fn AesGcm(comptime Aes: anytype) type { mac.pad(); mem.writeInt(u32, j[nonce_length..][0..4], 2, .big); - modes.ctr(@TypeOf(aes), aes, c, m, j, std.builtin.Endian.big); + modes.ctr(@TypeOf(aes), aes, c, m, j, .big); mac.update(c[0..m.len][0..]); mac.pad(); @@ -104,7 +103,7 @@ fn AesGcm(comptime Aes: anytype) type { } mem.writeInt(u32, j[nonce_length..][0..4], 2, .big); - modes.ctr(@TypeOf(aes), aes, m, c, j, std.builtin.Endian.big); + modes.ctr(@TypeOf(aes), aes, m, c, j, .big); } }; } @@ -113,8 +112,6 @@ const htest = @import("test.zig"); const testing = std.testing; test "Aes256Gcm - Empty message and no associated data" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length; const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length; const ad = ""; @@ -127,8 +124,6 @@ test "Aes256Gcm - Empty message and no associated data" { } test "Aes256Gcm - Associated data only" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length; const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length; const m = ""; @@ -141,8 +136,6 @@ test "Aes256Gcm - Associated data only" { } test "Aes256Gcm - Message only" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length; const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length; const m = "Test with message only"; @@ -160,8 +153,6 @@ test "Aes256Gcm - Message only" { } test "Aes256Gcm - Message and associated data" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length; const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length; const m = "Test with message"; diff --git a/lib/std/crypto/argon2.zig b/lib/std/crypto/argon2.zig index 500bebd09c..4e6d391799 100644 --- a/lib/std/crypto/argon2.zig +++ b/lib/std/crypto/argon2.zig @@ -896,8 +896,6 @@ test "kdf" { } test "phc format hasher" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const allocator = std.testing.allocator; const password = "testpass"; @@ -913,8 +911,6 @@ test "phc format hasher" { } test "password hash and password verify" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const allocator = std.testing.allocator; const password = "testpass"; diff --git a/lib/std/crypto/bcrypt.zig b/lib/std/crypto/bcrypt.zig index 9fc2ecb63b..720f264d16 100644 --- a/lib/std/crypto/bcrypt.zig +++ b/lib/std/crypto/bcrypt.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); const base64 = std.base64; const crypto = std.crypto; const debug = std.debug; @@ -754,8 +753,6 @@ pub fn strVerify( } test "bcrypt codec" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var salt: [salt_length]u8 = undefined; crypto.random.bytes(&salt); var salt_str: [salt_str_length]u8 = undefined; @@ -766,8 +763,6 @@ test "bcrypt codec" { } test "bcrypt crypt format" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var hash_options = HashOptions{ .params = .{ .rounds_log = 5 }, .encoding = .crypt, @@ -808,8 +803,6 @@ test "bcrypt crypt format" { } test "bcrypt phc format" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var hash_options = HashOptions{ .params = .{ .rounds_log = 5 }, .encoding = .phc, diff --git a/lib/std/crypto/cmac.zig b/lib/std/crypto/cmac.zig index 32ba7f4b99..902bac591c 100644 --- a/lib/std/crypto/cmac.zig +++ b/lib/std/crypto/cmac.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); const crypto = std.crypto; const mem = std.mem; @@ -94,8 +93,6 @@ pub fn Cmac(comptime BlockCipher: type) type { const testing = std.testing; test "CmacAes128 - Example 1: len = 0" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key = [_]u8{ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, }; @@ -109,8 +106,6 @@ test "CmacAes128 - Example 1: len = 0" { } test "CmacAes128 - Example 2: len = 16" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key = [_]u8{ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, }; @@ -126,8 +121,6 @@ test "CmacAes128 - Example 2: len = 16" { } test "CmacAes128 - Example 3: len = 40" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key = [_]u8{ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, }; @@ -145,8 +138,6 @@ test "CmacAes128 - Example 3: len = 40" { } test "CmacAes128 - Example 4: len = 64" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key = [_]u8{ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, }; diff --git a/lib/std/crypto/ecdsa.zig b/lib/std/crypto/ecdsa.zig index e705fcf79b..6f8a32ea21 100644 --- a/lib/std/crypto/ecdsa.zig +++ b/lib/std/crypto/ecdsa.zig @@ -373,7 +373,6 @@ pub fn Ecdsa(comptime Curve: type, comptime Hash: type) type { test "ECDSA - Basic operations over EcdsaP384Sha384" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const Scheme = EcdsaP384Sha384; const kp = try Scheme.KeyPair.create(null); @@ -407,7 +406,6 @@ test "ECDSA - Basic operations over Secp256k1" { test "ECDSA - Basic operations over EcdsaP384Sha256" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const Scheme = Ecdsa(crypto.ecc.P384, crypto.hash.sha2.Sha256); const kp = try Scheme.KeyPair.create(null); @@ -424,7 +422,6 @@ test "ECDSA - Basic operations over EcdsaP384Sha256" { test "ECDSA - Verifying a existing signature with EcdsaP384Sha256" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const Scheme = Ecdsa(crypto.ecc.P384, crypto.hash.sha2.Sha256); // zig fmt: off @@ -469,7 +466,6 @@ const TestVector = struct { test "ECDSA - Test vectors from Project Wycheproof" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const vectors = [_]TestVector{ .{ .key = "042927b10512bae3eddcfe467828128bad2903269919f7086069c8c4df6c732838c7787964eaac00e5921fb1498a60f4606766b3d9685001558d1a974e7341513e", .msg = "313233343030", .sig = "304402202ba3a8be6b94d5ec80a6d9d1190a436effe50d85a1eee859b8cc6af9bd5c2e1802204cd60b855d442f5b3c7b11eb6c4e0ae7525fe710fab9aa7c77a67f79e6fadd76", .result = .valid }, @@ -884,7 +880,6 @@ fn tvTry(vector: TestVector) !void { test "ECDSA - Sec1 encoding/decoding" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const Scheme = EcdsaP384Sha384; const kp = try Scheme.KeyPair.create(null); diff --git a/lib/std/crypto/ghash_polyval.zig b/lib/std/crypto/ghash_polyval.zig index 6949553ad5..6ccbf1f228 100644 --- a/lib/std/crypto/ghash_polyval.zig +++ b/lib/std/crypto/ghash_polyval.zig @@ -422,8 +422,6 @@ fn Hash(comptime endian: std.builtin.Endian, comptime shift_key: bool) type { const htest = @import("test.zig"); test "ghash" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const key = [_]u8{0x42} ** 16; const m = [_]u8{0x69} ** 256; @@ -441,8 +439,6 @@ test "ghash" { } test "ghash2" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var key: [16]u8 = undefined; var i: usize = 0; while (i < key.len) : (i += 1) { diff --git a/lib/std/crypto/pcurves/p256.zig b/lib/std/crypto/pcurves/p256.zig index 37f16715f4..ec176f78c5 100644 --- a/lib/std/crypto/pcurves/p256.zig +++ b/lib/std/crypto/pcurves/p256.zig @@ -478,7 +478,5 @@ pub const AffineCoordinates = struct { }; test { - if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest; - _ = @import("tests/p256.zig"); } diff --git a/lib/std/crypto/phc_encoding.zig b/lib/std/crypto/phc_encoding.zig index 6ac8f66bb4..fecd7f1239 100644 --- a/lib/std/crypto/phc_encoding.zig +++ b/lib/std/crypto/phc_encoding.zig @@ -1,7 +1,6 @@ // https://github.com/P-H-C/phc-string-format const std = @import("std"); -const builtin = @import("builtin"); const fmt = std.fmt; const io = std.io; const mem = std.mem; @@ -264,8 +263,6 @@ fn kvSplit(str: []const u8) !struct { key: []const u8, value: []const u8 } { } test "phc format - encoding/decoding" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const Input = struct { str: []const u8, HashResult: type, diff --git a/lib/std/crypto/sha2.zig b/lib/std/crypto/sha2.zig index 31884c7381..10909cfaec 100644 --- a/lib/std/crypto/sha2.zig +++ b/lib/std/crypto/sha2.zig @@ -238,7 +238,7 @@ fn Sha2x32(comptime params: Sha2Params32) type { return; }, // C backend doesn't currently support passing vectors to inline asm. - .x86_64 => if (builtin.zig_backend != .stage2_c and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) { + .x86_64 => if (builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) { var x: v4u32 = [_]u32{ d.s[5], d.s[4], d.s[1], d.s[0] }; var y: v4u32 = [_]u32{ d.s[7], d.s[6], d.s[3], d.s[2] }; const s_v = @as(*[16]v4u32, @ptrCast(&s)); diff --git a/lib/std/fmt/parse_float.zig b/lib/std/fmt/parse_float.zig index 00d19ea1c9..cdd11a6c59 100644 --- a/lib/std/fmt/parse_float.zig +++ b/lib/std/fmt/parse_float.zig @@ -83,8 +83,6 @@ test "fmt.parseFloat #11169" { } test "fmt.parseFloat hex.special" { - if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testing.expect(math.isNan(try parseFloat(f32, "nAn"))); try testing.expect(math.isPositiveInf(try parseFloat(f32, "iNf"))); try testing.expect(math.isPositiveInf(try parseFloat(f32, "+Inf"))); diff --git a/lib/std/hash/xxhash.zig b/lib/std/hash/xxhash.zig index 2e4c11e333..eef9c9654a 100644 --- a/lib/std/hash/xxhash.zig +++ b/lib/std/hash/xxhash.zig @@ -2,6 +2,7 @@ const std = @import("std"); const builtin = @import("builtin"); const mem = std.mem; const expectEqual = std.testing.expectEqual; +const native_endian = builtin.cpu.arch.endian(); const rotl = std.math.rotl; @@ -472,7 +473,7 @@ pub const XxHash3 = struct { } inline fn swap(x: anytype) @TypeOf(x) { - return if (builtin.cpu.arch.endian() == .big) @byteSwap(x) else x; + return if (native_endian == .big) @byteSwap(x) else x; } inline fn disableAutoVectorization(x: anytype) void { diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig index e8e0363c2e..eb9896d40a 100644 --- a/lib/std/http/Client.zig +++ b/lib/std/http/Client.zig @@ -9,6 +9,7 @@ const net = std.net; const Uri = std.Uri; const Allocator = mem.Allocator; const assert = std.debug.assert; +const use_vectors = builtin.zig_backend != .stage2_x86_64; const Client = @This(); const proto = @import("protocol.zig"); @@ -408,7 +409,7 @@ pub const Response = struct { else => return error.HttpHeadersInvalid, }; if (first_line[8] != ' ') return error.HttpHeadersInvalid; - const status = @as(http.Status, @enumFromInt(parseInt3(first_line[9..12].*))); + const status: http.Status = @enumFromInt(parseInt3(first_line[9..12])); const reason = mem.trimLeft(u8, first_line[12..], " "); res.version = version; @@ -481,20 +482,24 @@ pub const Response = struct { } inline fn int64(array: *const [8]u8) u64 { - return @as(u64, @bitCast(array.*)); + return @bitCast(array.*); } - fn parseInt3(nnn: @Vector(3, u8)) u10 { - const zero: @Vector(3, u8) = .{ '0', '0', '0' }; - const mmm: @Vector(3, u10) = .{ 100, 10, 1 }; - return @reduce(.Add, @as(@Vector(3, u10), nnn -% zero) *% mmm); + fn parseInt3(text: *const [3]u8) u10 { + if (use_vectors) { + const nnn: @Vector(3, u8) = text.*; + const zero: @Vector(3, u8) = .{ '0', '0', '0' }; + const mmm: @Vector(3, u10) = .{ 100, 10, 1 }; + return @reduce(.Add, @as(@Vector(3, u10), nnn -% zero) *% mmm); + } + return std.fmt.parseInt(u10, text, 10) catch unreachable; } test parseInt3 { const expectEqual = testing.expectEqual; - try expectEqual(@as(u10, 0), parseInt3("000".*)); - try expectEqual(@as(u10, 418), parseInt3("418".*)); - try expectEqual(@as(u10, 999), parseInt3("999".*)); + try expectEqual(@as(u10, 0), parseInt3("000")); + try expectEqual(@as(u10, 418), parseInt3("418")); + try expectEqual(@as(u10, 999), parseInt3("999")); } version: http.Version, @@ -1588,7 +1593,8 @@ test { if (builtin.os.tag == .wasi) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .avx)) return error.SkipZigTest; std.testing.refAllDecls(@This()); } diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig index 055d16eb8a..6928606b1b 100644 --- a/lib/std/http/Server.zig +++ b/lib/std/http/Server.zig @@ -736,8 +736,6 @@ test "HTTP server handles a chunked transfer coding request" { return error.SkipZigTest; } - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const native_endian = comptime builtin.cpu.arch.endian(); if (builtin.zig_backend == .stage2_llvm and native_endian == .big) { // https://github.com/ziglang/zig/issues/13782 diff --git a/lib/std/http/protocol.zig b/lib/std/http/protocol.zig index 8e458ed09c..4fe9c80380 100644 --- a/lib/std/http/protocol.zig +++ b/lib/std/http/protocol.zig @@ -1,8 +1,10 @@ const std = @import("../std.zig"); +const builtin = @import("builtin"); const testing = std.testing; const mem = std.mem; const assert = std.debug.assert; +const use_vectors = builtin.zig_backend != .stage2_x86_64; pub const State = enum { /// Begin header parsing states. @@ -83,7 +85,7 @@ pub const HeadersParser = struct { /// first byte of content is located at `bytes[result]`. pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 { const vector_len: comptime_int = @max(std.simd.suggestVectorSize(u8) orelse 1, 8); - const len = @as(u32, @intCast(bytes.len)); + const len: u32 = @intCast(bytes.len); var index: u32 = 0; while (true) { @@ -175,18 +177,27 @@ pub const HeadersParser = struct { continue; }, else => { - const Vector = @Vector(vector_len, u8); - // const BoolVector = @Vector(vector_len, bool); - const BitVector = @Vector(vector_len, u1); - const SizeVector = @Vector(vector_len, u8); - const chunk = bytes[index..][0..vector_len]; - const v: Vector = chunk.*; - const matches_r = @as(BitVector, @bitCast(v == @as(Vector, @splat('\r')))); - const matches_n = @as(BitVector, @bitCast(v == @as(Vector, @splat('\n')))); - const matches_or: SizeVector = matches_r | matches_n; + const matches = if (use_vectors) matches: { + const Vector = @Vector(vector_len, u8); + // const BoolVector = @Vector(vector_len, bool); + const BitVector = @Vector(vector_len, u1); + const SizeVector = @Vector(vector_len, u8); - const matches = @reduce(.Add, matches_or); + const v: Vector = chunk.*; + const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r'))); + const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n'))); + const matches_or: SizeVector = matches_r | matches_n; + + break :matches @reduce(.Add, matches_or); + } else matches: { + var matches: u8 = 0; + for (chunk) |byte| switch (byte) { + '\r', '\n' => matches += 1, + else => {}, + }; + break :matches matches; + }; switch (matches) { 0 => {}, 1 => switch (chunk[vector_len - 1]) { diff --git a/lib/std/math.zig b/lib/std/math.zig index f110efa0af..b18d5bc3ed 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -492,8 +492,6 @@ pub fn shl(comptime T: type, a: T, shift_amt: anytype) T { } test "shl" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) { // https://github.com/ziglang/zig/issues/12012 return error.SkipZigTest; @@ -539,8 +537,6 @@ pub fn shr(comptime T: type, a: T, shift_amt: anytype) T { } test "shr" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) { // https://github.com/ziglang/zig/issues/12012 return error.SkipZigTest; @@ -587,8 +583,6 @@ pub fn rotr(comptime T: type, x: T, r: anytype) T { } test "rotr" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) { // https://github.com/ziglang/zig/issues/12012 return error.SkipZigTest; @@ -634,8 +628,6 @@ pub fn rotl(comptime T: type, x: T, r: anytype) T { } test "rotl" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) { // https://github.com/ziglang/zig/issues/12012 return error.SkipZigTest; @@ -764,8 +756,6 @@ pub fn divTrunc(comptime T: type, numerator: T, denominator: T) !T { } test "divTrunc" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testDivTrunc(); try comptime testDivTrunc(); } @@ -790,8 +780,6 @@ pub fn divFloor(comptime T: type, numerator: T, denominator: T) !T { } test "divFloor" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testDivFloor(); try comptime testDivFloor(); } @@ -829,8 +817,6 @@ pub fn divCeil(comptime T: type, numerator: T, denominator: T) !T { } test "divCeil" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testDivCeil(); try comptime testDivCeil(); } @@ -875,8 +861,6 @@ pub fn divExact(comptime T: type, numerator: T, denominator: T) !T { } test "divExact" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testDivExact(); try comptime testDivExact(); } @@ -903,8 +887,6 @@ pub fn mod(comptime T: type, numerator: T, denominator: T) !T { } test "mod" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testMod(); try comptime testMod(); } @@ -931,8 +913,6 @@ pub fn rem(comptime T: type, numerator: T, denominator: T) !T { } test "rem" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testRem(); try comptime testRem(); } @@ -1285,7 +1265,8 @@ pub fn lerp(a: anytype, b: anytype, t: anytype) @TypeOf(a, b, t) { } test "lerp" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest; try testing.expectEqual(@as(f64, 75), lerp(50, 100, 0.5)); try testing.expectEqual(@as(f32, 43.75), lerp(50, 25, 0.25)); diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 4051a8cd31..ce73c0c648 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -1318,7 +1318,7 @@ pub const Mutable = struct { /// /// `limbs_buffer` is used for temporary storage. /// The amount required is given by `calcPowLimbsBufferLen`. - pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) !void { + pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void { assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing // Handle all the trivial cases first @@ -3213,7 +3213,7 @@ pub const Managed = struct { var m = try Managed.initCapacity(rma.allocator, needed_limbs); errdefer m.deinit(); var m_mut = m.toMutable(); - try m_mut.pow(a.toConst(), b, limbs_buffer); + m_mut.pow(a.toConst(), b, limbs_buffer); m.setMetadata(m_mut.positive, m_mut.len); rma.deinit(); @@ -3221,7 +3221,7 @@ pub const Managed = struct { } else { try rma.ensureCapacity(needed_limbs); var rma_mut = rma.toMutable(); - try rma_mut.pow(a.toConst(), b, limbs_buffer); + rma_mut.pow(a.toConst(), b, limbs_buffer); rma.setMetadata(rma_mut.positive, rma_mut.len); } } diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index 4a3bf10fcd..da8fb98c5c 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -2568,8 +2568,6 @@ test "big.int const to managed" { } test "big.int pow" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - { var a = try Managed.initSet(testing.allocator, -3); defer a.deinit(); @@ -2763,8 +2761,6 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void { } test "big int conversion read/write twos complement" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var a = try Managed.initSet(testing.allocator, (1 << 493) - 1); defer a.deinit(); var b = try Managed.initSet(testing.allocator, (1 << 493) - 1); @@ -2863,8 +2859,6 @@ test "big int write twos complement +/- zero" { } test "big int conversion write twos complement with padding" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var a = try Managed.initSet(testing.allocator, 0x01_ffffffff_ffffffff_ffffffff); defer a.deinit(); diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 420e461ae5..73fe2e7757 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -315,8 +315,6 @@ pub fn zeroes(comptime T: type) T { } test "zeroes" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const C_struct = extern struct { x: u32, y: u32 align(128), @@ -4342,8 +4340,6 @@ pub fn alignInSlice(slice: anytype, comptime new_alignment: usize) ?AlignedSlice } test "read/write(Var)PackedInt" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - switch (builtin.cpu.arch) { // This test generates too much code to execute on WASI. // LLVM backend fails with "too many locals: locals exceed maximum" diff --git a/lib/std/net/test.zig b/lib/std/net/test.zig index fd92c64f35..0fe53a7b9f 100644 --- a/lib/std/net/test.zig +++ b/lib/std/net/test.zig @@ -60,7 +60,7 @@ test "parse and render IPv6 addresses" { } test "invalid but parseable IPv6 scope ids" { - if (builtin.os.tag != .linux or comptime !builtin.os.tag.isDarwin()) { + if (builtin.os.tag != .linux and comptime !builtin.os.tag.isDarwin()) { // Currently, resolveIp6 with alphanumerical scope IDs only works on Linux. // TODO Make this test pass on other operating systems. return error.SkipZigTest; diff --git a/lib/std/once.zig b/lib/std/once.zig index 87f81ee59c..f012e017dd 100644 --- a/lib/std/once.zig +++ b/lib/std/once.zig @@ -46,8 +46,6 @@ fn incr() void { } test "Once executes its function just once" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - if (builtin.single_threaded) { global_once.call(); global_once.call(); diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index b7c1b7b675..f4a67f1035 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -375,8 +375,6 @@ fn testThreadIdFn(thread_id: *Thread.Id) void { test "std.Thread.getCurrentId" { if (builtin.single_threaded) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var thread_current_id: Thread.Id = undefined; const thread = try Thread.spawn(.{}, testThreadIdFn, .{&thread_current_id}); thread.join(); @@ -420,8 +418,6 @@ test "cpu count" { test "thread local storage" { if (builtin.single_threaded) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const thread1 = try Thread.spawn(.{}, testTls, .{}); const thread2 = try Thread.spawn(.{}, testTls, .{}); try testTls(); diff --git a/lib/std/rand/test.zig b/lib/std/rand/test.zig index e8a1d4de8a..d02c016357 100644 --- a/lib/std/rand/test.zig +++ b/lib/std/rand/test.zig @@ -1,5 +1,4 @@ const std = @import("../std.zig"); -const builtin = @import("builtin"); const math = std.math; const DefaultPrng = std.rand.DefaultPrng; const Random = std.rand.Random; @@ -200,8 +199,6 @@ fn testRandomIntLessThan() !void { } test "Random intAtMost" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - @setEvalBranchQuota(10000); try testRandomIntAtMost(); try comptime testRandomIntAtMost(); @@ -242,8 +239,6 @@ fn testRandomIntAtMost() !void { } test "Random Biased" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - var prng = DefaultPrng.init(0); const random = prng.random(); // Not thoroughly checking the logic here. @@ -452,8 +447,6 @@ test "CSPRNG" { } test "Random weightedIndex" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - // Make sure weightedIndex works for various integers and floats inline for (.{ u64, i4, f32, f64 }) |T| { var prng = DefaultPrng.init(0); diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 59dcf29e16..72f65afb3a 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1,5 +1,4 @@ const std = @import("../std.zig"); -const builtin = @import("builtin"); pub const Token = struct { tag: Tag, @@ -1450,8 +1449,6 @@ test "chars" { } test "invalid token characters" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testTokenize("#", &.{.invalid}); try testTokenize("`", &.{.invalid}); try testTokenize("'c", &.{.invalid}); @@ -1571,8 +1568,6 @@ test "pipe and then invalid" { } test "line comment and doc comment" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testTokenize("//", &.{}); try testTokenize("// a / b", &.{}); try testTokenize("// /", &.{}); @@ -1647,8 +1642,6 @@ test "range literals" { } test "number literals decimal" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testTokenize("0", &.{.number_literal}); try testTokenize("1", &.{.number_literal}); try testTokenize("2", &.{.number_literal}); @@ -1897,8 +1890,6 @@ test "invalid token with unfinished escape right before eof" { } test "saturating operators" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - try testTokenize("<<", &.{.angle_bracket_angle_bracket_left}); try testTokenize("<<|", &.{.angle_bracket_angle_bracket_left_pipe}); try testTokenize("<<|=", &.{.angle_bracket_angle_bracket_left_pipe_equal}); diff --git a/src/Compilation.zig b/src/Compilation.zig index f51500cf43..a041a4b188 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1121,7 +1121,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { const include_compiler_rt = options.want_compiler_rt orelse needs_c_symbols; const must_single_thread = target_util.isSingleThreaded(options.target); - const single_threaded = options.single_threaded orelse must_single_thread; + const single_threaded = options.single_threaded orelse must_single_thread or + // x86_64 codegen doesn't support TLV for most object formats + (!use_llvm and options.target.cpu.arch == .x86_64 and options.target.ofmt != .macho); if (must_single_thread and !single_threaded) { return error.TargetRequiresSingleThreaded; } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d923f32bdd..9d0a9e05b9 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -388,7 +388,7 @@ pub const MCValue = union(enum) { }; } - fn mem(mcv: MCValue, size: Memory.Size) Memory { + fn mem(mcv: MCValue, function: *Self, size: Memory.Size) !Memory { return switch (mcv) { .none, .unreach, @@ -409,7 +409,6 @@ pub const MCValue = union(enum) { .lea_frame, .reserved_frame, .air_ref, - .load_symbol, .lea_symbol, => unreachable, .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{ @@ -433,6 +432,19 @@ pub const MCValue = union(enum) { .disp = frame_addr.off, } }, }, + .load_symbol => |sym_off| { + assert(sym_off.off == 0); + return .{ + .base = .{ .reloc = .{ + .atom_index = try function.owner.getSymbolIndex(function), + .sym_index = sym_off.sym, + } }, + .mod = .{ .rm = .{ + .size = size, + .disp = sym_off.off, + } }, + }; + }, }; } @@ -722,12 +734,14 @@ const InstTracking = struct { const FrameAlloc = struct { abi_size: u31, + spill_pad: u3, abi_align: Alignment, ref_count: u16, - fn init(alloc_abi: struct { size: u64, alignment: Alignment }) FrameAlloc { + fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: Alignment }) FrameAlloc { return .{ .abi_size = @intCast(alloc_abi.size), + .spill_pad = alloc_abi.pad, .abi_align = alloc_abi.alignment, .ref_count = 0, }; @@ -738,6 +752,20 @@ const FrameAlloc = struct { .alignment = ty.abiAlignment(mod), }); } + fn initSpill(ty: Type, mod: *Module) FrameAlloc { + const abi_size = ty.abiSize(mod); + const spill_size = if (abi_size < 8) + math.ceilPowerOfTwoAssert(u64, abi_size) + else + std.mem.alignForward(u64, abi_size, 8); + return init(.{ + .size = spill_size, + .pad = @intCast(spill_size - abi_size), + .alignment = ty.abiAlignment(mod).maxStrict( + Alignment.fromNonzeroByteUnits(@min(spill_size, 8)), + ), + }); + } }; const StackAllocation = struct { @@ -1668,8 +1696,7 @@ fn gen(self: *Self) InnerError!void { // The address where to store the return value for the caller is in a // register which the callee is free to clobber. Therefore, we purposely // spill it to stack immediately. - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(Type.usize, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(Type.usize, mod)); try self.genSetMem( .{ .frame = frame_index }, 0, @@ -2434,7 +2461,7 @@ fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: b } } - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ty, mod)); return .{ .load_frame = .{ .index = frame_index } }; } @@ -2445,7 +2472,10 @@ fn regClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet { 80 => abi.RegisterClass.x87, else => abi.RegisterClass.sse, }, - .Vector => abi.RegisterClass.sse, + .Vector => switch (ty.childType(mod).toIntern()) { + .bool_type => abi.RegisterClass.gp, + else => abi.RegisterClass.sse, + }, else => abi.RegisterClass.gp, }; } @@ -2699,7 +2729,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, - src_mcv.mem(.qword), + try src_mcv.mem(self, .qword), ) else try self.asmRegisterRegisterRegister( .{ .v_ss, .cvtsd2 }, dst_reg, @@ -2711,7 +2741,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ ._ss, .cvtsd2 }, dst_reg, - src_mcv.mem(.qword), + try src_mcv.mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._ss, .cvtsd2 }, dst_reg, @@ -2798,7 +2828,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { .{ .v_sd, .cvtss2 }, dst_reg, dst_reg, - src_mcv.mem(.dword), + try src_mcv.mem(self, .dword), ) else try self.asmRegisterRegisterRegister( .{ .v_sd, .cvtss2 }, dst_reg, @@ -2810,7 +2840,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ ._sd, .cvtss2 }, dst_reg, - src_mcv.mem(.dword), + try src_mcv.mem(self, .dword), ) else try self.asmRegisterRegister( .{ ._sd, .cvtss2 }, dst_reg, @@ -2851,8 +2881,8 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { }; const dst_mcv = if (dst_int_info.bits <= src_storage_bits and - std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == - std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and + math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == + math.divCeil(u32, src_storage_bits, 64) catch unreachable and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(min_ty, dst_mcv, src_mcv); @@ -2869,22 +2899,28 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }; } - const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; - const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; + const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable; + const dst_limbs_len = math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; - const high_mcv = dst_mcv.address().offset((src_limbs_len - 1) * 8).deref(); - const high_reg = try self.copyToTmpRegister(switch (src_int_info.signedness) { - .signed => Type.isize, - .unsigned => Type.usize, - }, high_mcv); + const high_mcv: MCValue = if (dst_mcv.isMemory()) + dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() + else + .{ .register = dst_mcv.register_pair[1] }; + const high_reg = if (high_mcv.isRegister()) + high_mcv.getReg().? + else + try self.copyToTmpRegister(switch (src_int_info.signedness) { + .signed => Type.isize, + .unsigned => Type.usize, + }, high_mcv); const high_lock = self.register_manager.lockRegAssumeUnused(high_reg); defer self.register_manager.unlockReg(high_lock); const high_bits = src_int_info.bits % 64; if (high_bits > 0) { - const high_ty = try mod.intType(extend, high_bits); - try self.truncateRegister(high_ty, high_reg); - try self.genCopy(Type.usize, high_mcv, .{ .register = high_reg }); + try self.truncateRegister(src_ty, high_reg); + const high_ty = if (dst_int_info.bits >= 64) Type.usize else dst_ty; + try self.genCopy(high_ty, high_mcv, .{ .register = high_reg }); } if (dst_limbs_len > src_limbs_len) try self.genInlineMemset( @@ -2995,14 +3031,14 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { .{ .vp_, .@"and" }, dst_reg, dst_reg, - splat_addr_mcv.deref().mem(Memory.Size.fromSize(splat_abi_size)), + try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), ); try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg); } else { try self.asmRegisterMemory( .{ .p_, .@"and" }, dst_reg, - splat_addr_mcv.deref().mem(Memory.Size.fromSize(splat_abi_size)), + try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), ); try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg); } @@ -3048,7 +3084,7 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const len = try self.resolveInst(bin_op.rhs); const len_ty = self.typeOf(bin_op.rhs); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); try self.genSetMem( .{ .frame = frame_index }, @@ -3068,8 +3104,36 @@ fn airUnOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { } fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); + + const dst_ty = self.typeOfIndex(inst); + if (dst_ty.isAbiInt(mod)) { + const abi_size: u32 = @intCast(dst_ty.abiSize(mod)); + const bit_size: u32 = @intCast(dst_ty.bitSize(mod)); + if (abi_size * 8 > bit_size) { + const dst_lock = switch (dst_mcv) { + .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg), + else => null, + }; + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + if (dst_mcv.isRegister()) { + try self.truncateRegister(dst_ty, dst_mcv.getReg().?); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const hi_ty = try mod.intType(.unsigned, @intCast((dst_ty.bitSize(mod) - 1) % 64 + 1)); + const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); + try self.genSetReg(tmp_reg, hi_ty, hi_mcv); + try self.truncateRegister(dst_ty, tmp_reg); + try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg }); + } + } + } return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -3176,7 +3240,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - mat_lhs_mcv.address().offset(8).deref().mem(.qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_reg, @@ -3200,7 +3264,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .xor }, tmp_reg, - mat_rhs_mcv.address().offset(8).deref().mem(.qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._, .xor }, tmp_reg, @@ -3300,12 +3364,12 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .{ ._, .add }, tmp_regs[0], - mat_rhs_mcv.mem(.qword), + try mat_rhs_mcv.mem(self, .qword), ); try self.asmRegisterMemory( .{ ._, .adc }, tmp_regs[1], - mat_rhs_mcv.address().offset(8).deref().mem(.qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), ); } else for ( [_]Mir.Inst.Tag{ .add, .adc }, @@ -3534,7 +3598,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - mat_lhs_mcv.address().offset(8).deref().mem(.qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_reg, @@ -3558,7 +3622,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .xor }, tmp_reg, - mat_rhs_mcv.address().offset(8).deref().mem(.qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._, .xor }, tmp_reg, @@ -3567,7 +3631,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, Immediate.u(63)); try self.asmRegister(.{ ._, .not }, tmp_reg); - try self.asmMemoryImmediate(.{ ._, .cmp }, overflow.mem(.dword), Immediate.s(0)); + try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .dword), Immediate.s(0)); try self.freeValue(overflow); try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg); try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, Immediate.u(63)); @@ -3665,7 +3729,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod)); try self.genSetMem( .{ .frame = frame_index }, @intCast(tuple_ty.structFieldOffset(1, mod)), @@ -3682,7 +3746,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod)); try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, @@ -3738,7 +3802,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod)); try self.genSetMem( .{ .frame = frame_index }, @intCast(tuple_ty.structFieldOffset(1, mod)), @@ -3755,7 +3819,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod)); try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, @@ -3874,7 +3938,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { ); try self.asmMemoryImmediate( .{ ._, .cmp }, - overflow.mem(self.memSize(Type.c_int)), + try overflow.mem(self, self.memSize(Type.c_int)), Immediate.s(0), ); try self.genSetMem( @@ -3926,14 +3990,19 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }; defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_lhs_mcv.isMemory()) - try self.asmRegisterMemory(.{ ._, .mov }, .rax, mat_lhs_mcv.mem(.qword)) - else - try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]); + if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( + .{ ._, .mov }, + .rax, + try mat_lhs_mcv.mem(self, .qword), + ) else try self.asmRegisterRegister( + .{ ._, .mov }, + .rax, + mat_lhs_mcv.register_pair[0], + ); if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_regs[0], - mat_rhs_mcv.address().offset(8).deref().mem(.qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_regs[0], @@ -3944,7 +4013,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax); try self.asmSetccRegister(.o, tmp_regs[2].to8()); if (mat_rhs_mcv.isMemory()) - try self.asmMemory(.{ ._, .mul }, mat_rhs_mcv.mem(.qword)) + try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword)) else try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); @@ -3953,7 +4022,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_regs[0], - mat_lhs_mcv.address().offset(8).deref().mem(.qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_regs[0], @@ -3967,14 +4036,15 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { tmp_regs[3].to8(), ); try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); - if (mat_rhs_mcv.isMemory()) - try self.asmRegisterMemory(.{ .i_, .mul }, tmp_regs[0], mat_rhs_mcv.mem(.qword)) - else - try self.asmRegisterRegister( - .{ .i_, .mul }, - tmp_regs[0], - mat_rhs_mcv.register_pair[0], - ); + if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( + .{ .i_, .mul }, + tmp_regs[0], + try mat_rhs_mcv.mem(self, .qword), + ) else try self.asmRegisterRegister( + .{ .i_, .mul }, + tmp_regs[0], + mat_rhs_mcv.register_pair[0], + ); try self.asmSetccRegister(.o, tmp_regs[2].to8()); try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); @@ -4020,8 +4090,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { self.eflags_inst = inst; break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; } else { - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod)); try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, @@ -4032,8 +4101,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { src_ty.fmt(mod), dst_ty.fmt(mod), }); - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod)); if (dst_info.bits >= lhs_active_bits + rhs_active_bits) { try self.genSetMem( .{ .frame = frame_index }, @@ -4106,7 +4174,7 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), .memory, .indirect, .load_frame => try self.asmMemory( tag, - mat_rhs.mem(Memory.Size.fromSize(abi_size)), + try mat_rhs.mem(self, Memory.Size.fromSize(abi_size)), ), else => unreachable, } @@ -4160,8 +4228,8 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa ); try self.asmCmovccRegisterRegister( .z, - registerAlias(divisor, abi_size), - registerAlias(.rdx, abi_size), + registerAlias(divisor, @max(abi_size, 2)), + registerAlias(.rdx, @max(abi_size, 2)), ); try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax }); return MCValue{ .register = divisor }; @@ -4171,47 +4239,268 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - try self.spillRegisters(&.{.rcx}); - - const tag = self.air.instructions.items(.tag)[inst]; - try self.register_manager.getReg(.rcx, null); - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); + const air_tags = self.air.instructions.items(.tag); + const tag = air_tags[inst]; const lhs_ty = self.typeOf(bin_op.lhs); const rhs_ty = self.typeOf(bin_op.rhs); + const result: MCValue = result: { + switch (lhs_ty.zigTypeTag(mod)) { + .Int => { + try self.spillRegisters(&.{.rcx}); + try self.register_manager.getReg(.rcx, null); + const lhs_mcv = try self.resolveInst(bin_op.lhs); + const rhs_mcv = try self.resolveInst(bin_op.rhs); - const dst_mcv = try self.genShiftBinOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty); - switch (tag) { - .shr, .shr_exact, .shl_exact => {}, - .shl => switch (dst_mcv) { - .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg), - .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]), - .load_frame => |frame_addr| { - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); + const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty); + switch (tag) { + .shr, .shr_exact, .shl_exact => {}, + .shl => switch (dst_mcv) { + .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg), + .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]), + .load_frame => |frame_addr| { + const tmp_reg = + try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); - const lhs_bits: u31 = @intCast(lhs_ty.bitSize(mod)); - const tmp_ty = if (lhs_bits > 64) Type.usize else lhs_ty; - const off = frame_addr.off + lhs_bits / 64 * 8; - try self.genSetReg( - tmp_reg, - tmp_ty, - .{ .load_frame = .{ .index = frame_addr.index, .off = off } }, - ); - try self.truncateRegister(lhs_ty, tmp_reg); - try self.genSetMem( - .{ .frame = frame_addr.index }, - off, - tmp_ty, - .{ .register = tmp_reg }, - ); + const lhs_bits: u31 = @intCast(lhs_ty.bitSize(mod)); + const tmp_ty = if (lhs_bits > 64) Type.usize else lhs_ty; + const off = frame_addr.off + (lhs_bits - 1) / 64 * 8; + try self.genSetReg( + tmp_reg, + tmp_ty, + .{ .load_frame = .{ .index = frame_addr.index, .off = off } }, + ); + try self.truncateRegister(lhs_ty, tmp_reg); + try self.genSetMem( + .{ .frame = frame_addr.index }, + off, + tmp_ty, + .{ .register = tmp_reg }, + ); + }, + else => {}, + }, + else => unreachable, + } + break :result dst_mcv; + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(mod).intInfo(mod).bits) { + else => null, + 16 => switch (lhs_ty.vectorLen(mod)) { + else => null, + 1...8 => switch (tag) { + else => unreachable, + .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_w, .sra } + else + .{ .p_w, .sra }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_w, .srl } + else + .{ .p_w, .srl }, + }, + .shl, .shl_exact => if (self.hasFeature(.avx)) + .{ .vp_w, .sll } + else + .{ .p_w, .sll }, + }, + 9...16 => switch (tag) { + else => unreachable, + .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null, + }, + .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null, + }, + }, + 32 => switch (lhs_ty.vectorLen(mod)) { + else => null, + 1...4 => switch (tag) { + else => unreachable, + .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_d, .sra } + else + .{ .p_d, .sra }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_d, .srl } + else + .{ .p_d, .srl }, + }, + .shl, .shl_exact => if (self.hasFeature(.avx)) + .{ .vp_d, .sll } + else + .{ .p_d, .sll }, + }, + 5...8 => switch (tag) { + else => unreachable, + .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null, + }, + .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null, + }, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + else => null, + 1...2 => switch (tag) { + else => unreachable, + .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_q, .sra } + else + .{ .p_q, .sra }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_q, .srl } + else + .{ .p_q, .srl }, + }, + .shl, .shl_exact => if (self.hasFeature(.avx)) + .{ .vp_q, .sll } + else + .{ .p_q, .sll }, + }, + 3...4 => switch (tag) { + else => unreachable, + .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null, + }, + .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null, + }, + }, + })) |mir_tag| if (try self.air.value(bin_op.rhs, mod)) |rhs_val| { + switch (mod.intern_pool.indexToKey(rhs_val.toIntern())) { + .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) { + .repeated_elem => |rhs_elem| { + const abi_size: u32 = @intCast(lhs_ty.abiSize(mod)); + + const lhs_mcv = try self.resolveInst(bin_op.lhs); + const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) + .{lhs_mcv.getReg().?} ** 2 + else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{ + try self.register_manager.allocReg(inst, abi.RegisterClass.sse), + lhs_mcv.getReg().?, + } else .{(try self.copyToRegisterWithInstTracking( + inst, + lhs_ty, + lhs_mcv, + )).register} ** 2; + const reg_locks = + self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg }); + defer for (reg_locks) |reg_lock| if (reg_lock) |lock| + self.register_manager.unlockReg(lock); + + const shift_imm = + Immediate.u(@intCast(rhs_elem.toValue().toUnsignedInt(mod))); + if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate( + mir_tag, + registerAlias(dst_reg, abi_size), + registerAlias(lhs_reg, abi_size), + shift_imm, + ) else { + assert(dst_reg.id() == lhs_reg.id()); + try self.asmRegisterImmediate( + mir_tag, + registerAlias(dst_reg, abi_size), + shift_imm, + ); + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + else => {}, + } + } else if (Air.refToIndex(bin_op.rhs)) |rhs_inst| switch (air_tags[rhs_inst]) { + .splat => { + const abi_size: u32 = @intCast(lhs_ty.abiSize(mod)); + + const lhs_mcv = try self.resolveInst(bin_op.lhs); + const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) + .{lhs_mcv.getReg().?} ** 2 + else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{ + try self.register_manager.allocReg(inst, abi.RegisterClass.sse), + lhs_mcv.getReg().?, + } else .{(try self.copyToRegisterWithInstTracking( + inst, + lhs_ty, + lhs_mcv, + )).register} ** 2; + const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg }); + defer for (reg_locks) |reg_lock| if (reg_lock) |lock| + self.register_manager.unlockReg(lock); + + const shift_reg = + try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs }); + const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg); + defer self.register_manager.unlockReg(shift_lock); + + const mask_ty = try mod.vectorType(.{ .len = 16, .child = .u8_type }); + const mask_mcv = try self.genTypedValue(.{ + .ty = mask_ty, + .val = (try mod.intern(.{ .aggregate = .{ + .ty = mask_ty.toIntern(), + .storage = .{ .elems = &([1]InternPool.Index{ + (try rhs_ty.childType(mod).maxIntScalar(mod, Type.u8)).toIntern(), + } ++ [1]InternPool.Index{ + (try mod.intValue(Type.u8, 0)).toIntern(), + } ** 15) }, + } })).toValue(), + }); + const mask_addr_reg = + try self.copyToTmpRegister(Type.usize, mask_mcv.address()); + const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg); + defer self.register_manager.unlockReg(mask_addr_lock); + + if (self.hasFeature(.avx)) { + try self.asmRegisterRegisterMemory( + .{ .vp_, .@"and" }, + shift_reg.to128(), + shift_reg.to128(), + .{ + .base = .{ .reg = mask_addr_reg }, + .mod = .{ .rm = .{ .size = .xword } }, + }, + ); + try self.asmRegisterRegisterRegister( + mir_tag, + registerAlias(dst_reg, abi_size), + registerAlias(lhs_reg, abi_size), + shift_reg.to128(), + ); + } else { + try self.asmRegisterMemory( + .{ .p_, .@"and" }, + shift_reg.to128(), + .{ + .base = .{ .reg = mask_addr_reg }, + .mod = .{ .rm = .{ .size = .xword } }, + }, + ); + assert(dst_reg.id() == lhs_reg.id()); + try self.asmRegisterRegister( + mir_tag, + registerAlias(dst_reg, abi_size), + shift_reg.to128(), + ); + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + else => {}, }, else => {}, - }, - else => unreachable, - } - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); + } + return self.fail("TODO implement airShlShrBinOp for {}", .{lhs_ty.fmt(mod)}); + }; + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { @@ -4230,12 +4519,18 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { const opt_mcv = try self.resolveInst(ty_op.operand); if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) { - switch (opt_mcv) { - .register => |reg| try self.truncateRegister(pl_ty, reg), - .register_overflow => |ro| try self.truncateRegister(pl_ty, ro.reg), + const pl_mcv: MCValue = switch (opt_mcv) { + .register_overflow => |ro| pl: { + self.eflags_inst = null; // actually stop tracking the overflow part + break :pl .{ .register = ro.reg }; + }, + else => opt_mcv, + }; + switch (pl_mcv) { + .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg), else => {}, } - break :result opt_mcv; + break :result pl_mcv; } const pl_mcv = try self.allocRegOrMem(inst, true); @@ -4472,8 +4767,9 @@ fn genUnwrapErrUnionPayloadMir( const eu_lock = self.register_manager.lockReg(reg); defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); - const result_mcv: MCValue = if (maybe_inst) |inst| - try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union) + const payload_in_gp = self.regClassForType(payload_ty).supersetOf(abi.RegisterClass.gp); + const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null) + try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union) else .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; if (payload_off > 0) try self.genShiftBinOpMir( @@ -4482,7 +4778,12 @@ fn genUnwrapErrUnionPayloadMir( result_mcv, .{ .immediate = @as(u6, @intCast(payload_off * 8)) }, ) else try self.truncateRegister(payload_ty, result_mcv.register); - break :result result_mcv; + break :result if (payload_in_gp) + result_mcv + else if (maybe_inst) |inst| + try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv) + else + .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) }; }, else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {}", .{err_union}), } @@ -4593,7 +4894,7 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = result: { if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .{ .immediate = 0 }; - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, mod)); const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod)); const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand); @@ -4615,7 +4916,7 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = result: { if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result try self.resolveInst(ty_op.operand); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, mod)); const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod)); const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef); @@ -4770,14 +5071,19 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const slice_ty = self.typeOf(bin_op.lhs); - const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod); - const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.load(dst_mcv, slice_ptr_field_type, elem_ptr); + const result: MCValue = result: { + const elem_ty = self.typeOfIndex(inst); + if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none; - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); + const slice_ty = self.typeOf(bin_op.lhs); + const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod); + const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); + const dst_mcv = try self.allocRegOrMem(inst, false); + try self.load(dst_mcv, slice_ptr_field_type, elem_ptr); + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { @@ -4810,11 +5116,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { }; defer if (index_lock) |lock| self.register_manager.unlockReg(lock); - const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); + switch (array) { .register => { const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod)); @@ -4843,6 +5148,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { else => return self.fail("TODO implement array_elem_val when array is {}", .{array}), } + const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size); + const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); + defer self.register_manager.unlockReg(offset_lock); + // TODO we could allocate register here, but need to expect addr register and potentially // offset register. try self.spillEflagsIfOccupied(); @@ -5093,7 +5402,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { .{ ._, .sub }, dst_ty, dst_mcv, - .{ .immediate = 8 + self.regExtraBits(src_ty) }, + .{ .immediate = 32 - src_bits }, ); } else if (src_bits <= 64) { try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); @@ -5361,7 +5670,9 @@ fn airPopCount(self: *Self, inst: Air.Inst.Index) !void { mat_src_mcv else .{ .register = mat_src_mcv.register_pair[0] }, false); - try self.genPopCount(tmp_regs[1], Type.usize, if (mat_src_mcv.isMemory()) + const src_info = src_ty.intInfo(mod); + const hi_ty = try mod.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1); + try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isMemory()) mat_src_mcv.address().offset(8).deref() else .{ .register = mat_src_mcv.register_pair[1] }, false); @@ -5383,9 +5694,13 @@ fn genPopCount( const src_abi_size: u32 = @intCast(src_ty.abiSize(mod)); if (self.hasFeature(.popcnt)) return self.genBinOpMir( .{ ._, .popcnt }, - if (src_abi_size > 1) src_ty else Type.u16, + if (src_abi_size > 1) src_ty else Type.u32, .{ .register = dst_reg }, - src_mcv, + if (src_abi_size > 1) src_mcv else src: { + if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv); + try self.truncateRegister(try src_ty.toUnsigned(mod), dst_reg); + break :src .{ .register = dst_reg }; + }, ); const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8); @@ -5517,9 +5832,9 @@ fn genByteSwap( try self.asmRegisterMemory( .{ ._, .movbe }, dst_regs[0], - src_mcv.address().offset(8).deref().mem(.qword), + try src_mcv.address().offset(8).deref().mem(self, .qword), ); - try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], src_mcv.mem(.qword)); + try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], try src_mcv.mem(self, .qword)); } else for (dst_regs, src_mcv.register_pair) |dst_reg, src_reg| { try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to64(), src_reg.to64()); try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); @@ -5762,7 +6077,7 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) else => unreachable, } }); const sign_mem: Memory = if (sign_mcv.isMemory()) - sign_mcv.mem(Memory.Size.fromSize(abi_size)) + try sign_mcv.mem(self, Memory.Size.fromSize(abi_size)) else .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, @@ -5945,7 +6260,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro mir_tag, dst_alias, dst_alias, - src_mcv.mem(Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), Immediate.u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterRegisterImmediate( mir_tag, @@ -5960,7 +6275,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( mir_tag, dst_alias, - src_mcv.mem(Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), Immediate.u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterImmediate( mir_tag, @@ -6000,7 +6315,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( .l, registerAlias(dst_mcv.register, cmov_abi_size), - src_mcv.mem(Memory.Size.fromSize(cmov_abi_size)), + try src_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)), ), else => { const val_reg = try self.copyToTmpRegister(ty, src_mcv); @@ -6100,7 +6415,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_alias, - src_mcv.mem(self.memSize(ty)), + try src_mcv.mem(self, self.memSize(ty)), ) else try self.asmRegisterRegister( mir_tag, dst_alias, @@ -6206,7 +6521,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, wide_reg, - src_mcv.mem(Memory.Size.fromSize( + try src_mcv.mem(self, Memory.Size.fromSize( @intCast(@divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( @@ -6254,7 +6569,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { mir_tag, dst_reg, dst_reg, - src_mcv.mem(Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -6267,7 +6582,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - src_mcv.mem(Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -6332,7 +6647,7 @@ fn reuseOperandAdvanced( return false; switch (mcv) { - .register, .register_pair => for (mcv.getRegs()) |reg| { + .register, .register_pair, .register_overflow => for (mcv.getRegs()) |reg| { // If it's in the registers table, need to associate the register(s) with the // new instruction. if (maybe_tracked_inst) |tracked_inst| { @@ -6346,6 +6661,10 @@ fn reuseOperandAdvanced( .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, else => return false, } + switch (mcv) { + .eflags, .register_overflow => self.eflags_inst = maybe_tracked_inst, + else => {}, + } // Prevent the operand deaths processing code from deallocating it. self.liveness.clearOperandDeath(inst, op_index); @@ -6363,11 +6682,36 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn if (!val_ty.hasRuntimeBitsIgnoreComptime(mod)) return; const val_abi_size: u32 = @intCast(val_ty.abiSize(mod)); + if (ptr_info.packed_offset.bit_offset % 8 == 0) { + try self.load( + dst_mcv, + ptr_ty, + ptr_mcv.offset(@intCast(@divExact(ptr_info.packed_offset.bit_offset, 8))), + ); + const val_bit_size: u32 = @intCast(val_ty.bitSize(mod)); + if (val_abi_size * 8 > val_bit_size) { + if (dst_mcv.isRegister()) { + try self.truncateRegister(val_ty, dst_mcv.getReg().?); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref(); + try self.genSetReg(tmp_reg, Type.usize, hi_mcv); + try self.truncateRegister(val_ty, tmp_reg); + try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg }); + } + } + return; + } + if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{val_ty.fmt(mod)}); const limb_abi_size: u32 = @min(val_abi_size, 8); const limb_abi_bits = limb_abi_size * 8; - const val_byte_off: i32 = @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size); + const val_byte_off: i32 = + @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size); const val_bit_off = ptr_info.packed_offset.bit_offset % limb_abi_bits; const val_extra_bits = self.regExtraBits(val_ty); @@ -6530,7 +6874,7 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(limb_abi_size), - .disp = src_byte_off + limb_i * limb_abi_bits, + .disp = src_byte_off + limb_i * limb_abi_size, } }, }; @@ -6575,6 +6919,22 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In limb_mem, registerAlias(tmp_reg, limb_abi_size), ); + } else if (src_bit_size <= 128 and src_bit_off == 0) { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) { + 0 => src_mcv, + else => src_mcv.address().offset(limb_i * limb_abi_size).deref(), + }); + try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); + try self.asmMemoryRegister( + .{ ._, .@"or" }, + limb_mem, + registerAlias(tmp_reg, limb_abi_size), + ); } else return self.fail("TODO: implement packed store of {}", .{src_ty.fmt(mod)}); } } @@ -6808,17 +7168,17 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { .register_overflow => |ro| { switch (index) { // Get wrapped value for overflow operation. - 0 => break :result if (self.liveness.operandDies(inst, 0)) - .{ .register = ro.reg } - else - try self.copyToRegisterWithInstTracking( - inst, - Type.usize, - .{ .register = ro.reg }, - ), + 0 => if (self.reuseOperand(inst, extra.struct_operand, 0, src_mcv)) { + self.eflags_inst = null; // actually stop tracking the overflow part + break :result .{ .register = ro.reg }; + } else break :result try self.copyToRegisterWithInstTracking( + inst, + Type.usize, + .{ .register = ro.reg }, + ), // Get overflow bit. - 1 => if (self.liveness.operandDies(inst, 0)) { - self.eflags_inst = inst; + 1 => if (self.reuseOperandAdvanced(inst, extra.struct_operand, 0, src_mcv, null)) { + self.eflags_inst = inst; // actually keep tracking the overflow part break :result .{ .eflags = ro.eflags }; } else { const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); @@ -6833,11 +7193,12 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (field_off % 8 == 0) { const off_mcv = src_mcv.address().offset(@intCast(@divExact(field_off, 8))).deref(); + const field_bit_size = field_ty.bitSize(mod); if (field_abi_size <= 8) { const int_ty = try mod.intType( if (field_ty.isAbiInt(mod)) field_ty.intInfo(mod).signedness else .unsigned, - @intCast(field_ty.bitSize(mod)), + @intCast(field_bit_size), ); const dst_reg = try self.register_manager.allocReg( @@ -6856,10 +7217,24 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); } - if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv; + const dst_mcv = if (self.reuseOperand(inst, operand, 0, src_mcv)) + off_mcv + else dst: { + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(field_ty, dst_mcv, off_mcv); + break :dst dst_mcv; + }; + if (field_abi_size * 8 > field_bit_size and dst_mcv.isMemory()) { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(field_ty, dst_mcv, off_mcv); + const hi_mcv = + dst_mcv.address().offset(@intCast(field_bit_size / 64 * 8)).deref(); + try self.genSetReg(tmp_reg, Type.usize, hi_mcv); + try self.truncateRegister(field_ty, tmp_reg); + try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg }); + } break :result dst_mcv; } @@ -7013,7 +7388,25 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); } }, - .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv), + .neg => { + try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv); + const abi_size: u16 = @intCast(src_ty.abiSize(mod)); + const bit_size = src_ty.intInfo(mod).bits; + if (abi_size * 8 > bit_size) { + if (dst_mcv.isRegister()) { + try self.truncateRegister(src_ty, dst_mcv.getReg().?); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); + try self.genSetReg(tmp_reg, Type.usize, hi_mcv); + try self.truncateRegister(src_ty, tmp_reg); + try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg }); + } + } + }, else => unreachable, } return dst_mcv; @@ -7054,7 +7447,7 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MC }, .indirect, .load_frame => try self.asmMemory( mir_tag, - dst_mcv.mem(Memory.Size.fromSize(abi_size)), + try dst_mcv.mem(self, Memory.Size.fromSize(abi_size)), ), } } @@ -7552,27 +7945,27 @@ fn genMulDivBinOp( defer self.register_manager.unlockReg(tmp_lock); if (mat_lhs_mcv.isMemory()) - try self.asmRegisterMemory(.{ ._, .mov }, .rax, mat_lhs_mcv.mem(.qword)) + try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .qword)) else try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]); if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - mat_rhs_mcv.address().offset(8).deref().mem(.qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]); try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax); if (mat_rhs_mcv.isMemory()) - try self.asmMemory(.{ ._, .mul }, mat_rhs_mcv.mem(.qword)) + try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword)) else try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - mat_lhs_mcv.address().offset(8).deref().mem(.qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]); if (mat_rhs_mcv.isMemory()) - try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.mem(.qword)) + try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .qword)) else try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); @@ -7833,7 +8226,7 @@ fn genBinOp( .{ .vp_w, .insr }, dst_reg, dst_reg, - rhs_mcv.mem(.word), + try rhs_mcv.mem(self, .word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -7858,7 +8251,7 @@ fn genBinOp( mir_tag, dst_reg, dst_reg, - src_mcv.mem(Memory.Size.fromBitSize(float_bits)), + try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -7877,7 +8270,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - src_mcv.mem(Memory.Size.fromBitSize(float_bits)), + try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -7919,12 +8312,18 @@ fn genBinOp( }; } - if ((lhs_ty.scalarType(mod).isRuntimeFloat() and + const sse_op = switch (lhs_ty.zigTypeTag(mod)) { + else => false, + .Float => true, + .Vector => switch (lhs_ty.childType(mod).toIntern()) { + .bool_type => false, + else => true, + }, + }; + if (sse_op and ((lhs_ty.scalarType(mod).isRuntimeFloat() and lhs_ty.scalarType(mod).floatBits(self.target.*) == 80) or - lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16)) - return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(air_tag), lhs_ty.fmt(mod), - }); + lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16))) + return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(mod) }); const maybe_mask_reg = switch (air_tag) { else => null, @@ -7941,10 +8340,16 @@ fn genBinOp( if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); - const ordered_air = if (lhs_ty.isVector(mod) and lhs_ty.childType(mod).isAbiInt(mod) and - switch (air_tag) { - .cmp_lt, .cmp_gte => true, - else => false, + const ordered_air = if (lhs_ty.isVector(mod) and switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Int => switch (air_tag) { + .cmp_lt, .cmp_gte => true, + else => false, + }, + .Float => switch (air_tag) { + .cmp_gte, .cmp_gt => true, + else => false, + }, + else => unreachable, }) .{ .lhs = rhs_air, .rhs = lhs_air } else .{ .lhs = lhs_air, .rhs = rhs_air }; const lhs_mcv = try self.resolveInst(ordered_air.lhs); @@ -7971,14 +8376,12 @@ fn genBinOp( .xor, .min, .max, + .cmp_eq, + .cmp_neq, => true, else => false, }; - const vec_op = switch (lhs_ty.zigTypeTag(mod)) { - else => false, - .Float, .Vector => true, - }; const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, @@ -8000,23 +8403,23 @@ fn genBinOp( var flipped = false; var copied_to_dst = true; const dst_mcv: MCValue = dst: { + const tracked_inst = switch (air_tag) { + else => maybe_inst, + .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null, + }; if (maybe_inst) |inst| { - const tracked_inst = switch (air_tag) { - else => inst, - .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null, - }; - if ((!vec_op or lhs_mcv.isRegister()) and + if ((!sse_op or lhs_mcv.isRegister()) and self.reuseOperandAdvanced(inst, ordered_air.lhs, 0, lhs_mcv, tracked_inst)) break :dst lhs_mcv; - if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and + if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and self.reuseOperandAdvanced(inst, ordered_air.rhs, 1, rhs_mcv, tracked_inst)) { flipped = true; break :dst rhs_mcv; } } - const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) + const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true); + if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) copied_to_dst = false else try self.genCopy(lhs_ty, dst_mcv, lhs_mcv); @@ -8046,7 +8449,7 @@ fn genBinOp( }; defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); - if (!vec_op) { + if (!sse_op) { switch (air_tag) { .add, .add_wrap, @@ -8130,17 +8533,25 @@ fn genBinOp( try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); if (src_mcv.isMemory()) { - try self.asmRegisterMemory(.{ ._, .cmp }, dst_regs[0], src_mcv.mem(.qword)); + try self.asmRegisterMemory( + .{ ._, .cmp }, + dst_regs[0], + try src_mcv.mem(self, .qword), + ); try self.asmRegisterMemory( .{ ._, .sbb }, tmp_reg, - src_mcv.address().offset(8).deref().mem(.qword), + try src_mcv.address().offset(8).deref().mem(self, .qword), + ); + try self.asmCmovccRegisterMemory( + cc, + dst_regs[0], + try src_mcv.mem(self, .qword), ); - try self.asmCmovccRegisterMemory(cc, dst_regs[0], src_mcv.mem(.qword)); try self.asmCmovccRegisterMemory( cc, dst_regs[1], - src_mcv.address().offset(8).deref().mem(.qword), + try src_mcv.address().offset(8).deref().mem(self, .qword), ); } else { try self.asmRegisterRegister( @@ -8292,7 +8703,7 @@ fn genBinOp( .{ .vp_w, .insr }, dst_reg, dst_reg, - src_mcv.mem(.word), + try src_mcv.mem(self, .word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -8738,7 +9149,7 @@ fn genBinOp( .{ .vp_w, .insr }, dst_reg, dst_reg, - src_mcv.mem(.word), + try src_mcv.mem(self, .word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -8784,7 +9195,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ .vp_d, .insr }, dst_reg, - src_mcv.mem(.dword), + try src_mcv.mem(self, .dword), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( .{ .v_ps, .unpckl }, @@ -8836,7 +9247,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, - src_mcv.mem(.qword), + try src_mcv.mem(self, .qword), ) else try self.asmRegisterRegister( .{ .v_ps, .cvtph2 }, tmp_reg, @@ -8879,7 +9290,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, - src_mcv.mem(.xword), + try src_mcv.mem(self, .xword), ) else try self.asmRegisterRegister( .{ .v_ps, .cvtph2 }, tmp_reg, @@ -8925,6 +9336,13 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, + .cmp_lt, + .cmp_lte, + .cmp_eq, + .cmp_gte, + .cmp_gt, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp }, else => unreachable, }, 2...4 => switch (air_tag) { @@ -8938,6 +9356,13 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, + .cmp_lt, + .cmp_lte, + .cmp_eq, + .cmp_gte, + .cmp_gt, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp }, else => unreachable, }, 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { @@ -8947,6 +9372,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .min }, + .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp }, else => unreachable, } else null, else => null, @@ -8963,6 +9389,13 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, + .cmp_lt, + .cmp_lte, + .cmp_eq, + .cmp_gte, + .cmp_gt, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp }, else => unreachable, }, 2 => switch (air_tag) { @@ -8976,6 +9409,13 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, + .cmp_lt, + .cmp_lte, + .cmp_eq, + .cmp_gte, + .cmp_gt, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp }, else => unreachable, }, 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { @@ -8984,6 +9424,7 @@ fn genBinOp( .mul => .{ .v_pd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, .max => .{ .v_pd, .max }, + .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp }, .min => .{ .v_pd, .min }, else => unreachable, } else null, @@ -9004,43 +9445,96 @@ fn genBinOp( const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null; defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock); - if (self.hasFeature(.avx)) { - const lhs_reg = - if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - mir_tag, - dst_reg, - lhs_reg, - src_mcv.mem(switch (lhs_ty.zigTypeTag(mod)) { - else => Memory.Size.fromSize(abi_size), - .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()), - }), - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_reg, - lhs_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), - ); - } else { - assert(copied_to_dst); - if (src_mcv.isMemory()) try self.asmRegisterMemory( - mir_tag, - dst_reg, - src_mcv.mem(switch (lhs_ty.zigTypeTag(mod)) { - else => Memory.Size.fromSize(abi_size), - .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()), - }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), - ); + switch (mir_tag[1]) { + else => if (self.hasFeature(.avx)) { + const lhs_reg = + if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_reg, + lhs_reg, + try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) { + else => Memory.Size.fromSize(abi_size), + .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + }), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_reg, + lhs_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } else { + assert(copied_to_dst); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_reg, + try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) { + else => Memory.Size.fromSize(abi_size), + .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + }), + ) else try self.asmRegisterRegister( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + }, + .cmp => { + const imm = Immediate.u(switch (air_tag) { + .cmp_eq => 0, + .cmp_lt, .cmp_gt => 1, + .cmp_lte, .cmp_gte => 2, + .cmp_neq => 4, + else => unreachable, + }); + if (self.hasFeature(.avx)) { + const lhs_reg = + if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + mir_tag, + dst_reg, + lhs_reg, + try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) { + else => Memory.Size.fromSize(abi_size), + .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + }), + imm, + ) else try self.asmRegisterRegisterRegisterImmediate( + mir_tag, + dst_reg, + lhs_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + imm, + ); + } else { + assert(copied_to_dst); + if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + mir_tag, + dst_reg, + try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) { + else => Memory.Size.fromSize(abi_size), + .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + }), + imm, + ) else try self.asmRegisterRegisterImmediate( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + imm, + ); + } + }, } switch (air_tag) { @@ -9281,48 +9775,46 @@ fn genBinOp( ); } }, - .cmp_lt, - .cmp_lte, - .cmp_eq, - .cmp_gte, - .cmp_gt, - .cmp_neq, - => { - switch (air_tag) { - .cmp_lt, - .cmp_eq, - .cmp_gt, - => {}, - .cmp_lte, - .cmp_gte, - .cmp_neq, - => { - const unsigned_ty = try lhs_ty.toUnsigned(mod); - const not_mcv = try self.genTypedValue(.{ - .ty = lhs_ty, - .val = try unsigned_ty.maxInt(mod, unsigned_ty), - }); - const not_mem: Memory = if (not_mcv.isMemory()) - not_mcv.mem(Memory.Size.fromSize(abi_size)) - else - .{ .base = .{ - .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()), - }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } } }; - switch (mir_tag[0]) { - .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( - .{ .vp_, .xor }, - dst_reg, - dst_reg, - not_mem, - ), - .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory( - .{ .p_, .xor }, - dst_reg, - not_mem, - ), - else => unreachable, - } + .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => { + switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Int => switch (air_tag) { + .cmp_lt, + .cmp_eq, + .cmp_gt, + => {}, + .cmp_lte, + .cmp_gte, + .cmp_neq, + => { + const unsigned_ty = try lhs_ty.toUnsigned(mod); + const not_mcv = try self.genTypedValue(.{ + .ty = lhs_ty, + .val = try unsigned_ty.maxInt(mod, unsigned_ty), + }); + const not_mem: Memory = if (not_mcv.isMemory()) + try not_mcv.mem(self, Memory.Size.fromSize(abi_size)) + else + .{ .base = .{ + .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()), + }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } } }; + switch (mir_tag[0]) { + .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( + .{ .vp_, .xor }, + dst_reg, + dst_reg, + not_mem, + ), + .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory( + .{ .p_, .xor }, + dst_reg, + not_mem, + ), + else => unreachable, + } + }, + else => unreachable, }, + .Float => {}, else => unreachable, } @@ -9331,8 +9823,12 @@ fn genBinOp( defer self.register_manager.unlockReg(gp_lock); try self.asmRegisterRegister(switch (mir_tag[0]) { - .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk }, + ._pd, ._sd => .{ ._pd, .movmsk }, + ._ps, ._ss => .{ ._ps, .movmsk }, .p_b, .p_d, .p_q, .p_w => .{ .p_b, .movmsk }, + .v_pd, .v_sd => .{ .v_pd, .movmsk }, + .v_ps, .v_ss => .{ .v_ps, .movmsk }, + .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk }, else => unreachable, }, gp_reg.to32(), dst_reg); return .{ .register = gp_reg }; @@ -9459,13 +9955,13 @@ fn genBinOpMir( .load_frame, .lea_frame, => { - blk: { - return self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) { + direct: { + try self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) { .memory => |addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(limb_abi_size), - .disp = math.cast(i32, addr + off) orelse break :blk, + .disp = math.cast(i32, addr + off) orelse break :direct, } }, }, .indirect => |reg_off| .{ @@ -9482,8 +9978,9 @@ fn genBinOpMir( .disp = frame_addr.off + off, } }, }, - else => break :blk, + else => break :direct, }); + continue; } switch (src_mcv) { @@ -10180,7 +10677,7 @@ fn genCall(self: *Self, info: union(enum) { .none, .unreach => {}, .indirect => |reg_off| { const ret_ty = fn_info.return_type.toType(); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ret_ty, mod)); try self.genSetReg(reg_off.reg, Type.usize, .{ .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }); @@ -10306,19 +10803,20 @@ fn genCall(self: *Self, info: union(enum) { fn airRet(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; - const operand = try self.resolveInst(un_op); const ret_ty = self.fn_type.fnReturnType(mod); switch (self.ret_mcv.short) { .none => {}, - .register, .register_pair => try self.genCopy(ret_ty, self.ret_mcv.short, operand), + .register, + .register_pair, + => try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }), .indirect => |reg_off| { try self.register_manager.getReg(reg_off.reg, null); const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg); defer self.register_manager.unlockReg(lock); try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long); - try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, operand); + try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, .{ .air_ref = un_op }); }, else => unreachable, } @@ -10593,7 +11091,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const locks = self.register_manager.lockRegsAssumeUnused(2, regs); defer for (locks) |lock| self.register_manager.unlockReg(lock); - const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable; + const limbs_len = math.divCeil(u16, abi_size, 8) catch unreachable; var limb_i: u16 = 0; while (limb_i < limbs_len) : (limb_i += 1) { const off = limb_i * 8; @@ -10688,7 +11186,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { .{ .vp_w, .insr }, tmp1_reg, dst_reg.to128(), - src_mcv.mem(.word), + try src_mcv.mem(self, .word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -10892,8 +11390,8 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index { }, .register => |reg| { try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1)); - return self.asmJccReloc(.e, undefined); + try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), Immediate.u(1)); + return self.asmJccReloc(.z, undefined); }, .immediate, .load_frame, @@ -11433,12 +11931,12 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) { if (first_br) break :result src_mcv; - if (block_tracking.getReg()) |block_reg| + for (block_tracking.getRegs()) |block_reg| try self.register_manager.getReg(block_reg, br.block_inst); // .long = .none to avoid merging operand and block result stack frames. var current_tracking = InstTracking{ .long = .none, .short = src_mcv }; try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*); - if (src_mcv.getReg()) |src_reg| self.register_manager.freeReg(src_reg); + for (src_mcv.getRegs()) |src_reg| self.register_manager.freeReg(src_reg); break :result block_tracking.short; } @@ -12177,16 +12675,87 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .general_purpose, .segment => return .{ .move = .{ ._, .mov } }, .x87 => return .x87_load_store, .mmx => {}, - .sse => { - switch (ty.zigTypeTag(mod)) { - else => { - const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none); - assert(std.mem.indexOfNone(abi.Class, classes, &.{ - .integer, .sse, .float, .float_combine, - }) == null); - const abi_size = ty.abiSize(mod); - if (abi_size < 4 or - std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) { + .sse => switch (ty.zigTypeTag(mod)) { + else => { + const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none); + assert(std.mem.indexOfNone(abi.Class, classes, &.{ + .integer, .sse, .float, .float_combine, + }) == null); + const abi_size = ty.abiSize(mod); + if (abi_size < 4 or + std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) { + 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ + .insert = .{ .vp_b, .insr }, + .extract = .{ .vp_b, .extr }, + } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ + .insert = .{ .p_b, .insr }, + .extract = .{ .p_b, .extr }, + } }, + 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 9...16 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 17...32 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + } else switch (abi_size) { + 4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 9...16 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } + else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, + 17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned) + .{ .v_pd, .mova } + else + .{ .v_pd, .movu } }, + else => {}, + } + }, + .Float => switch (ty.floatBits(self.target.*)) { + 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 32 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 64 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 128 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + else => {}, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Bool => {}, + .Int => switch (ty.childType(mod).intInfo(mod).bits) { + 8 => switch (ty.vectorLen(mod)) { 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ .insert = .{ .vp_b, .insr }, .extract = .{ .vp_b, .extr }, @@ -12213,242 +12782,169 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 17...32 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, else => {}, - } else switch (abi_size) { - 4 => return .{ .move = if (self.hasFeature(.avx)) + }, + 16 => switch (ty.vectorLen(mod)) { + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + }, + 32 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 5...8 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 3...4 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + }, + 128 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 2 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + }, + 256 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + }, + else => {}, + }, + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 16 => switch (ty.vectorLen(mod)) { + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + }, + 32 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) + 2 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } }, - 9...16 => return .{ .move = if (self.hasFeature(.avx)) + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + 5...8 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_ps, .mova } + else + .{ .v_ps, .movu } }, + else => {}, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, - 17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned) - .{ .v_pd, .mova } - else - .{ .v_pd, .movu } }, - else => {}, - } - }, - .Float => switch (ty.floatBits(self.target.*)) { - 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 32 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } - else - .{ ._ss, .mov } }, - 64 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 128 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - else => {}, - }, - .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { - .Bool => return .{ .move = .{ ._, .mov } }, - .Int => switch (ty.childType(mod).intInfo(mod).bits) { - 8 => switch (ty.vectorLen(mod)) { - 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ - .insert = .{ .vp_b, .insr }, - .extract = .{ .vp_b, .extr }, - } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ - .insert = .{ .p_b, .insr }, - .extract = .{ .p_b, .extr }, - } }, - 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } + 3...4 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_pd, .mova } else - .{ ._d, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 9...16 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 17...32 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, - 16 => switch (ty.vectorLen(mod)) { - 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 9...16 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, - 32 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 5...8 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, - 64 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, - 128 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 2 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, - 256 => switch (ty.vectorLen(mod)) { - 1 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, + .{ .v_pd, .movu } }, else => {}, }, - .Float => switch (ty.childType(mod).floatBits(self.target.*)) { - 16 => switch (ty.vectorLen(mod)) { - 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } + 128 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 2 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } else - .{ ._d, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 9...16 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, - 32 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } - else - .{ ._ss, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, - 5...8 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_ps, .mova } - else - .{ .v_ps, .movu } }, - else => {}, - }, - 64 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } - else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, - 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_pd, .mova } - else - .{ .v_pd, .movu } }, - else => {}, - }, - 128 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, - 2 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, - else => {}, - }, + .{ .v_, .movdqu } }, else => {}, }, else => {}, }, - } + else => {}, + }, }, } return self.fail("TODO moveStrategy for {}", .{ty.fmt(mod)}); @@ -12514,32 +13010,18 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError }; defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); - const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none); - for (dst_regs, classes, 0..) |dst_reg, class, dst_reg_i| { - const class_ty = switch (class) { - .integer => Type.usize, - .sse, .float, .float_combine => Type.f64, + var part_disp: i32 = 0; + for (dst_regs, try self.splitType(ty), 0..) |dst_reg, dst_ty, part_i| { + try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) { + .register_pair => |src_regs| .{ .register = src_regs[part_i] }, + .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(), + .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{ + .reg = src_info.?.addr_reg, + .off = part_disp, + } }, else => unreachable, - }; - const off: i32 = @intCast(dst_reg_i * 8); - switch (src_mcv) { - .register_pair => |src_regs| try self.genSetReg( - dst_reg, - class_ty, - .{ .register = src_regs[dst_reg_i] }, - ), - .memory, .indirect, .load_frame => try self.genSetReg( - dst_reg, - class_ty, - src_mcv.address().offset(off).deref(), - ), - .load_symbol, .load_direct, .load_got, .load_tlv => try self.genSetReg( - dst_reg, - class_ty, - .{ .indirect = .{ .reg = src_info.?.addr_reg, .off = off } }, - ), - else => unreachable, - } + }); + part_disp += @intCast(dst_ty.abiSize(mod)); } }, .indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv), @@ -12584,6 +13066,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr if (imm == 0) { // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. + try self.spillEflagsIfOccupied(); try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); } else if (abi_size > 4 and math.cast(u32, imm) != null) { // 32-bit moves zero-extend to 64-bit. @@ -12933,44 +13416,65 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal .eflags => |cc| try self.asmSetccMemory(cc, .{ .base = base, .mod = .{ .rm = .{ .size = .byte, .disp = disp }, } }), - .register => |src_reg| try (try self.moveStrategy(ty, src_reg.class(), switch (base) { - .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))), - .reg => |reg| switch (reg) { - .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))), - else => false, - }, - .frame => |frame_index| self.getFrameAddrAlignment( - .{ .index = frame_index, .off = disp }, - ).compare(.gte, ty.abiAlignment(mod)), - .reloc => false, - })).write( - self, - .{ .base = base, .mod = .{ .rm = .{ - .size = self.memSize(ty), - .disp = disp, - } } }, - registerAlias(src_reg, abi_size), - ), - .register_pair => |src_regs| for (src_regs, 0..) |src_reg, src_reg_i| { - const part_size: u16 = @min(abi_size - src_reg_i * 8, 8); - try (try self.moveStrategy( - try mod.intType(.unsigned, part_size * 8), - src_reg.class(), - switch (base) { - .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))), - .reg => |reg| switch (reg) { - .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))), - else => false, - }, - .frame => |frame_index| self.getFrameAddrAlignment( - .{ .index = frame_index, .off = disp }, - ).compare(.gte, ty.abiAlignment(mod)), - .reloc => false, + .register => |src_reg| { + const mem_size = switch (base) { + .frame => |base_fi| mem_size: { + assert(disp >= 0); + const frame_abi_size = self.frame_allocs.items(.abi_size)[@intFromEnum(base_fi)]; + const frame_spill_pad = self.frame_allocs.items(.spill_pad)[@intFromEnum(base_fi)]; + assert(frame_abi_size - frame_spill_pad - disp >= abi_size); + break :mem_size if (frame_abi_size - frame_spill_pad - disp == abi_size) + frame_abi_size + else + abi_size; }, - )).write(self, .{ .base = base, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(part_size), - .disp = disp + @as(i32, @intCast(src_reg_i * 8)), - } } }, registerAlias(src_reg, part_size)); + else => abi_size, + }; + const src_alias = registerAlias(src_reg, abi_size); + const src_size: u32 = @intCast(switch (src_alias.class()) { + .general_purpose, .segment, .x87 => @divExact(src_alias.bitSize(), 8), + .mmx, .sse => abi_size, + }); + if (src_size > mem_size) { + const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ + .size = src_size, + .alignment = Alignment.fromNonzeroByteUnits(src_size), + })); + const frame_mcv: MCValue = .{ .load_frame = .{ .index = frame_index } }; + try (try self.moveStrategy(ty, src_alias.class(), true)).write( + self, + .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ + .size = Memory.Size.fromSize(src_size), + } } }, + src_alias, + ); + try self.genSetMem(base, disp, ty, frame_mcv); + try self.freeValue(frame_mcv); + } else try (try self.moveStrategy(ty, src_alias.class(), switch (base) { + .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))), + .reg => |reg| switch (reg) { + .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))), + else => false, + }, + .frame => |frame_index| self.getFrameAddrAlignment( + .{ .index = frame_index, .off = disp }, + ).compare(.gte, ty.abiAlignment(mod)), + .reloc => false, + })).write( + self, + .{ .base = base, .mod = .{ .rm = .{ + .size = self.memSize(ty), + .disp = disp, + } } }, + src_alias, + ); + }, + .register_pair => |src_regs| { + var part_disp: i32 = disp; + for (try self.splitType(ty), src_regs) |src_ty, src_reg| { + try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg }); + part_disp += @intCast(src_ty.abiSize(mod)); + } }, .register_overflow => |ro| switch (ty.zigTypeTag(mod)) { .Struct => { @@ -13226,50 +13730,43 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv = if (dst_rc.supersetOf(src_rc) and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else dst: { + const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(mod) <= src_ty.abiSize(mod) and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy( - if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty, - dst_mcv, - src_mcv, - ); + try self.genCopy(switch (math.order(dst_ty.abiSize(mod), src_ty.abiSize(mod))) { + .lt => dst_ty, + .eq => if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty, + .gt => src_ty, + }, dst_mcv, src_mcv); break :dst dst_mcv; }; if (dst_ty.isRuntimeFloat()) break :result dst_mcv; - const dst_signedness = - if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned; - if (!src_ty.isRuntimeFloat() or src_ty.floatBits(self.target.*) != 80) { - const src_signedness = - if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned; - if (dst_signedness == src_signedness) break :result dst_mcv; - } + if (dst_ty.isAbiInt(mod) and src_ty.isAbiInt(mod) and + dst_ty.intInfo(mod).signedness == src_ty.intInfo(mod).signedness) break :result dst_mcv; - const abi_size: u16 = @intCast(dst_ty.abiSize(mod)); - const bit_size: u16 = @intCast(dst_ty.bitSize(mod)); - if (abi_size * 8 <= bit_size) break :result dst_mcv; + const abi_size = dst_ty.abiSize(mod); + const bit_size = dst_ty.bitSize(mod); + if (abi_size * 8 <= bit_size or dst_ty.isVector(mod)) break :result dst_mcv; - const dst_limbs_len = math.divCeil(i32, bit_size, 64) catch unreachable; - const high_reg = if (dst_mcv.isRegister()) - dst_mcv.getReg().? + const dst_limbs_len = math.divCeil(i32, @intCast(bit_size), 64) catch unreachable; + const high_mcv: MCValue = switch (dst_mcv) { + .register => |dst_reg| .{ .register = dst_reg }, + .register_pair => |dst_regs| .{ .register = dst_regs[1] }, + else => dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(), + }; + const high_reg = if (high_mcv.isRegister()) + high_mcv.getReg().? else - try self.copyToTmpRegister( - Type.usize, - dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(), - ); + try self.copyToTmpRegister(Type.usize, high_mcv); const high_lock = self.register_manager.lockReg(high_reg); defer if (high_lock) |lock| self.register_manager.unlockReg(lock); - const high_ty = try mod.intType(dst_signedness, bit_size % 64); - - try self.truncateRegister(high_ty, high_reg); - if (!dst_mcv.isRegister()) try self.genCopy( - Type.usize, - dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(), + try self.truncateRegister(dst_ty, high_reg); + if (!high_mcv.isRegister()) try self.genCopy( + if (abi_size <= 8) dst_ty else Type.usize, + high_mcv, .{ .register = high_reg }, ); break :result dst_mcv; @@ -13287,7 +13784,7 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { const array_ty = ptr_ty.childType(mod); const array_len = array_ty.arrayLen(mod); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); try self.genSetMem( .{ .frame = frame_index }, @@ -13497,7 +13994,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { const ptr_mcv = try self.resolveInst(extra.ptr); const mem_size = Memory.Size.fromSize(val_abi_size); const ptr_mem: Memory = switch (ptr_mcv) { - .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(mem_size), + .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size), else => .{ .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, .mod = .{ .rm = .{ .size = mem_size } }, @@ -13563,7 +14060,7 @@ fn atomicOp( const val_abi_size: u32 = @intCast(val_ty.abiSize(mod)); const mem_size = Memory.Size.fromSize(val_abi_size); const ptr_mem: Memory = switch (ptr_mcv) { - .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(mem_size), + .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size), else => .{ .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, .mod = .{ .rm = .{ .size = mem_size } }, @@ -13671,27 +14168,41 @@ fn atomicOp( }, }; - try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); const cmov_abi_size = @max(val_abi_size, 2); switch (val_mcv) { - .register => |val_reg| try self.asmCmovccRegisterRegister( - cc, - registerAlias(tmp_reg, cmov_abi_size), - registerAlias(val_reg, cmov_abi_size), - ), - .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( - cc, - registerAlias(tmp_reg, cmov_abi_size), - val_mcv.mem(Memory.Size.fromSize(cmov_abi_size)), - ), - else => { - const val_reg = try self.copyToTmpRegister(val_ty, val_mcv); + .register => |val_reg| { + try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); try self.asmCmovccRegisterRegister( cc, registerAlias(tmp_reg, cmov_abi_size), registerAlias(val_reg, cmov_abi_size), ); }, + .memory, .indirect, .load_frame => { + try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); + try self.asmCmovccRegisterMemory( + cc, + registerAlias(tmp_reg, cmov_abi_size), + try val_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)), + ); + }, + else => { + const mat_reg = try self.copyToTmpRegister(val_ty, val_mcv); + const mat_lock = self.register_manager.lockRegAssumeUnused(mat_reg); + defer self.register_manager.unlockReg(mat_lock); + + try self.genBinOpMir( + .{ ._, .cmp }, + val_ty, + tmp_mcv, + .{ .register = mat_reg }, + ); + try self.asmCmovccRegisterRegister( + cc, + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(mat_reg, cmov_abi_size), + ); + }, } }, }; @@ -13728,8 +14239,8 @@ fn atomicOp( .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()), } }, }; - const val_lo_mem = val_mem_mcv.mem(.qword); - const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword); + const val_lo_mem = try val_mem_mcv.mem(self, .qword); + const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .qword); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax); try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx); @@ -14000,7 +14511,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemoryImmediate( .{ .i_, .mul }, len_reg, - dst_ptr.address().offset(8).deref().mem(.qword), + try dst_ptr.address().offset(8).deref().mem(self, .qword), Immediate.s(@intCast(dst_ptr_ty.childType(mod).abiSize(mod))), ); break :len .{ .register = len_reg }; @@ -14171,28 +14682,162 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const vector_ty = self.typeOfIndex(inst); + const vector_len = vector_ty.vectorLen(mod); const dst_rc = self.regClassForType(vector_ty); - const scalar_ty = vector_ty.scalarType(mod); + const scalar_ty = self.typeOf(ty_op.operand); - const src_mcv = try self.resolveInst(ty_op.operand); const result: MCValue = result: { switch (scalar_ty.zigTypeTag(mod)) { else => {}, + .Bool => { + const regs = + try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); + const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs); + defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); + + try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }); + try self.genSetReg( + regs[1], + vector_ty, + .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - vector_len) }, + ); + const src_mcv = try self.resolveInst(ty_op.operand); + const abi_size = @max(math.divCeil(u32, vector_len, 8) catch unreachable, 4); + try self.asmCmovccRegisterRegister( + switch (src_mcv) { + .eflags => |cc| cc, + .register => |src_reg| cc: { + try self.asmRegisterImmediate( + .{ ._, .@"test" }, + src_reg.to8(), + Immediate.u(1), + ); + break :cc .nz; + }, + else => cc: { + try self.asmMemoryImmediate( + .{ ._, .@"test" }, + try src_mcv.mem(self, .byte), + Immediate.u(1), + ); + break :cc .nz; + }, + }, + registerAlias(regs[0], abi_size), + registerAlias(regs[1], abi_size), + ); + break :result .{ .register = regs[0] }; + }, + .Int => if (self.hasFeature(.avx2)) avx2: { + const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(mod).bits) { + else => null, + 1...8 => switch (vector_len) { + else => null, + 1...32 => .{ .vp_b, .broadcast }, + }, + 9...16 => switch (vector_len) { + else => null, + 1...16 => .{ .vp_w, .broadcast }, + }, + 17...32 => switch (vector_len) { + else => null, + 1...8 => .{ .vp_d, .broadcast }, + }, + 33...64 => switch (vector_len) { + else => null, + 1...4 => .{ .vp_q, .broadcast }, + }, + 65...128 => switch (vector_len) { + else => null, + 1...2 => .{ .vp_i128, .broadcast }, + }, + }) orelse break :avx2; + + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + const src_mcv = try self.resolveInst(ty_op.operand); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))), + try src_mcv.mem(self, self.memSize(scalar_ty)), + ) else { + if (mir_tag[0] == .vp_i128) break :avx2; + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + try self.asmRegisterRegister( + mir_tag, + registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))), + registerAlias(dst_reg, @intCast(scalar_ty.abiSize(mod))), + ); + } + break :result .{ .register = dst_reg }; + } else { + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand }); + if (vector_len == 1) break :result .{ .register = dst_reg }; + + const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))); + const scalar_bits = scalar_ty.intInfo(mod).bits; + if (switch (scalar_bits) { + 1...8 => true, + 9...128 => false, + else => unreachable, + }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklbw }, + dst_alias, + dst_alias, + dst_alias, + ) else try self.asmRegisterRegister( + .{ .p_, .unpcklbw }, + dst_alias, + dst_alias, + ); + if (switch (scalar_bits) { + 1...8 => vector_len > 2, + 9...16 => true, + 17...128 => false, + else => unreachable, + }) try self.asmRegisterRegisterImmediate( + .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl }, + dst_alias, + dst_alias, + Immediate.u(0), + ); + if (switch (scalar_bits) { + 1...8 => vector_len > 4, + 9...16 => vector_len > 2, + 17...64 => true, + 65...128 => false, + else => unreachable, + }) try self.asmRegisterRegisterImmediate( + .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf }, + dst_alias, + dst_alias, + Immediate.u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00), + ); + break :result .{ .register = dst_reg }; + }, .Float => switch (scalar_ty.floatBits(self.target.*)) { - 32 => switch (vector_ty.vectorLen(mod)) { + 32 => switch (vector_len) { 1 => { + const src_mcv = try self.resolveInst(ty_op.operand); if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; const dst_reg = try self.register_manager.allocReg(inst, dst_rc); try self.genSetReg(dst_reg, scalar_ty, src_mcv); break :result .{ .register = dst_reg }; }, 2...4 => { + const src_mcv = try self.resolveInst(ty_op.operand); if (self.hasFeature(.avx)) { const dst_reg = try self.register_manager.allocReg(inst, dst_rc); if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ss, .broadcast }, dst_reg.to128(), - src_mcv.mem(.dword), + try src_mcv.mem(self, .dword), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -14224,11 +14869,12 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { } }, 5...8 => if (self.hasFeature(.avx)) { + const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ss, .broadcast }, dst_reg.to256(), - src_mcv.mem(.dword), + try src_mcv.mem(self, .dword), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -14259,20 +14905,22 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { }, else => {}, }, - 64 => switch (vector_ty.vectorLen(mod)) { + 64 => switch (vector_len) { 1 => { + const src_mcv = try self.resolveInst(ty_op.operand); if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; const dst_reg = try self.register_manager.allocReg(inst, dst_rc); try self.genSetReg(dst_reg, scalar_ty, src_mcv); break :result .{ .register = dst_reg }; }, 2 => { + const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); if (self.hasFeature(.sse3)) { if (src_mcv.isMemory()) try self.asmRegisterMemory( if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, dst_reg.to128(), - src_mcv.mem(.qword), + try src_mcv.mem(self, .qword), ) else try self.asmRegisterRegister( if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, dst_reg.to128(), @@ -14292,11 +14940,12 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { ); }, 3...4 => if (self.hasFeature(.avx)) { + const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_sd, .broadcast }, dst_reg.to256(), - src_mcv.mem(.qword), + try src_mcv.mem(self, .qword), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -14325,19 +14974,21 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { }, else => {}, }, - 128 => switch (vector_ty.vectorLen(mod)) { + 128 => switch (vector_len) { 1 => { + const src_mcv = try self.resolveInst(ty_op.operand); if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; const dst_reg = try self.register_manager.allocReg(inst, dst_rc); try self.genSetReg(dst_reg, scalar_ty, src_mcv); break :result .{ .register = dst_reg }; }, 2 => if (self.hasFeature(.avx)) { + const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_f128, .broadcast }, dst_reg.to256(), - src_mcv.mem(.xword), + try src_mcv.mem(self, .xword), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -14389,7 +15040,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); const operand_mcv = try self.resolveInst(reduce.operand); - const mask_len = (std.math.cast(u6, operand_ty.vectorLen(mod)) orelse + const mask_len = (math.cast(u6, operand_ty.vectorLen(mod)) orelse return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)})); const mask = (@as(u64, 1) << mask_len) - 1; const abi_size: u32 = @intCast(operand_ty.abiSize(mod)); @@ -14397,7 +15048,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { .Or => { if (operand_mcv.isMemory()) try self.asmMemoryImmediate( .{ ._, .@"test" }, - operand_mcv.mem(Memory.Size.fromSize(abi_size)), + try operand_mcv.mem(self, Memory.Size.fromSize(abi_size)), Immediate.u(mask), ) else { const operand_reg = registerAlias(if (operand_mcv.isRegister()) @@ -14445,8 +15096,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = result: { switch (result_ty.zigTypeTag(mod)) { .Struct => { - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod)); if (result_ty.containerLayout(mod) == .Packed) { const struct_type = mod.typeToStruct(result_ty).?; try self.genInlineMemset( @@ -14542,8 +15192,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { break :result .{ .load_frame = .{ .index = frame_index } }; }, .Array => { - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod)); const elem_ty = result_ty.childType(mod); const elem_size: u32 = @intCast(elem_ty.abiSize(mod)); @@ -14789,7 +15438,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { mir_tag, mop1_reg, mop2_reg, - mops[2].mem(Memory.Size.fromSize(abi_size)), + try mops[2].mem(self, Memory.Size.fromSize(abi_size)), ); break :result mops[0]; }; @@ -14807,7 +15456,7 @@ fn airVaStart(self: *Self, inst: Air.Inst.Index) !void { )) { .SysV => result: { const info = self.va_info.sysv; - const dst_fi = try self.allocFrameIndex(FrameAlloc.initType(va_list_ty, mod)); + const dst_fi = try self.allocFrameIndex(FrameAlloc.initSpill(va_list_ty, mod)); var field_off: u31 = 0; // gp_offset: c_uint, try self.genSetMem( @@ -15015,7 +15664,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, - promote_mcv.mem(.qword), + try promote_mcv.mem(self, .qword), ) else try self.asmRegisterRegisterRegister( .{ .v_ss, .cvtsd2 }, dst_reg, @@ -15027,7 +15676,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { ) else if (promote_mcv.isMemory()) try self.asmRegisterMemory( .{ ._ss, .cvtsd2 }, dst_reg, - promote_mcv.mem(.qword), + try promote_mcv.mem(self, .qword), ) else try self.asmRegisterRegister( .{ ._ss, .cvtsd2 }, dst_reg, @@ -15473,6 +16122,33 @@ fn memSize(self: *Self, ty: Type) Memory.Size { }; } +fn splitType(self: *Self, ty: Type) ![2]Type { + const mod = self.bin_file.options.module.?; + const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none); + var parts: [2]Type = undefined; + if (classes.len == 2) for (&parts, classes, 0..) |*part, class, part_i| { + part.* = switch (class) { + .integer => switch (part_i) { + 0 => Type.u64, + 1 => part: { + const elem_size = ty.abiAlignment(mod).minStrict(.@"8").toByteUnitsOptional().?; + const elem_ty = try mod.intType(.unsigned, @intCast(elem_size * 8)); + break :part switch (@divExact(ty.abiSize(mod) - 8, elem_size)) { + 1 => elem_ty, + else => |len| try mod.arrayType(.{ .len = len, .child = elem_ty.toIntern() }), + }; + }, + else => unreachable, + }, + .float => Type.f32, + .float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }), + .sse => Type.f64, + else => break, + }; + } else if (parts[0].abiSize(mod) + parts[1].abiSize(mod) == ty.abiSize(mod)) return parts; + return self.fail("TODO implement splitType for {}", .{ty.fmt(mod)}); +} + /// Truncates the value in the register in place. /// Clobbers any remaining bits. fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 52ef0511c1..54f36af507 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -410,6 +410,8 @@ pub const Mnemonic = enum { vfmadd132ps, vfmadd213ps, vfmadd231ps, vfmadd132sd, vfmadd213sd, vfmadd231sd, vfmadd132ss, vfmadd213ss, vfmadd231ss, + // AVX2 + vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw, // zig fmt: on }; @@ -444,7 +446,7 @@ pub const Op = enum { moffs, sreg, st, mm, mm_m64, - xmm0, xmm, xmm_m32, xmm_m64, xmm_m128, + xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128, ymm, ymm_m256, // zig fmt: on @@ -534,7 +536,7 @@ pub const Op = enum { .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, .st, .mm, .mm_m64 => unreachable, - .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, + .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, .ymm, .ymm_m256 => unreachable, .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .unity => 1, @@ -556,7 +558,7 @@ pub const Op = enum { .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64, .st => 80, - .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, + .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => 128, .ymm, .ymm_m256 => 256, }; } @@ -568,8 +570,8 @@ pub const Op = enum { .rel8, .rel16, .rel32 => unreachable, .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable, .st, .mm, .xmm0, .xmm, .ymm => unreachable, - .m8, .rm8, .r32_m8 => 8, - .m16, .rm16, .r32_m16, .r64_m16 => 16, + .m8, .rm8, .r32_m8, .xmm_m8 => 8, + .m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16, .m32, .rm32, .xmm_m32 => 32, .m64, .rm64, .mm_m64, .xmm_m64 => 64, .m80 => 80, @@ -600,7 +602,7 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64, .r32_m8, .r32_m16, .r64_m16, .st, .mm, .mm_m64, - .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128, .ymm, .ymm_m256, => true, else => false, @@ -629,7 +631,7 @@ pub const Op = enum { .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, .mm_m64, - .xmm_m32, .xmm_m64, .xmm_m128, + .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128, .ymm_m256, => true, else => false, @@ -654,7 +656,7 @@ pub const Op = enum { .sreg => .segment, .st => .x87, .mm, .mm_m64 => .mmx, - .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, + .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, .ymm, .ymm_m256 => .sse, }; } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index a5cdfe42b2..2e8970eee7 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -255,6 +255,8 @@ pub const Inst = struct { vp_q, /// VEX-Encoded Packed ___ Double Quadword vp_dq, + /// VEX-Encoded Packed ___ Integer Data + vp_i128, /// VEX-Encoded ___ Scalar Single-Precision Values v_ss, /// VEX-Encoded ___ Packed Single-Precision Values diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index c8595cd004..579934da8e 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -237,7 +237,7 @@ pub const Register = enum(u7) { return @intCast(@intFromEnum(reg) - base); } - pub fn bitSize(reg: Register) u64 { + pub fn bitSize(reg: Register) u10 { return switch (@intFromEnum(reg)) { // zig fmt: off @intFromEnum(Register.rax) ... @intFromEnum(Register.r15) => 64, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index e160d0ced5..76003f89a6 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1742,6 +1742,16 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + .{ .vpbroadcastb, .rm, &.{ .xmm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_128_w0, .avx2 }, + .{ .vpbroadcastb, .rm, &.{ .ymm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_256_w0, .avx2 }, + .{ .vpbroadcastw, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x79 }, 0, .vex_128_w0, .avx2 }, + .{ .vpbroadcastw, .rm, &.{ .ymm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x79 }, 0, .vex_256_w0, .avx2 }, + .{ .vpbroadcastd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_128_w0, .avx2 }, + .{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 }, + .{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 }, + .{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 }, + .{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 }, + .{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 }, .{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 }, .{ .vpcmpeqd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x76 }, 0, .vex_256_wig, .avx2 }, diff --git a/src/codegen.zig b/src/codegen.zig index bb2a9f9324..d347aff7bc 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -376,7 +376,10 @@ pub fn generateSymbol( .val = switch (aggregate.storage) { .bytes => unreachable, .elems => |elems| elems[@as(usize, @intCast(index))], - .repeated_elem => |elem| elem, + .repeated_elem => |elem| if (index < array_type.len) + elem + else + array_type.sentinel, }.toValue(), }, code, debug_output, reloc_info)) { .ok => {}, diff --git a/src/link/Coff.zig b/src/link/Coff.zig index fd2415bff5..075fb60861 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -388,6 +388,7 @@ fn populateMissingMetadata(self: *Coff) !void { self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{ .CNT_INITIALIZED_DATA = 1, .MEM_READ = 1, + .MEM_WRITE = 1, }); } diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 8bcf219c4d..a1b9a4f66e 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -1260,7 +1260,6 @@ test "zero multiplicand" { test "@intCast to u0" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO