diff --git a/lib/std/target.zig b/lib/std/target.zig index 1f31d602ca..27260255e3 100644 --- a/lib/std/target.zig +++ b/lib/std/target.zig @@ -566,7 +566,7 @@ pub const Target = union(enum) { pub const Set = struct { ints: [usize_count]usize, - pub const needed_bit_count = 174; + pub const needed_bit_count = 175; pub const byte_count = (needed_bit_count + 7) / 8; pub const usize_count = (byte_count + (@sizeOf(usize) - 1)) / @sizeOf(usize); pub const Index = std.math.Log2Int(@IntType(false, usize_count * @bitSizeOf(usize))); diff --git a/lib/std/target/aarch64.zig b/lib/std/target/aarch64.zig index d2878e2423..5524c8fe22 100644 --- a/lib/std/target/aarch64.zig +++ b/lib/std/target/aarch64.zig @@ -6,6 +6,7 @@ pub const Feature = enum { a53, a55, a57, + a65, a72, a73, a75, @@ -15,6 +16,11 @@ pub const Feature = enum { alternate_sextload_cvt_f32_pattern, altnzcv, am, + apple_a10, + apple_a11, + apple_a12, + apple_a13, + apple_a7, arith_bcc_fusion, arith_cbz_fusion, balance_fp_ops, @@ -35,10 +41,10 @@ pub const Feature = enum { crc, crypto, custom_cheap_as_move, - cyclone, disable_latency_sched_heuristic, dit, dotprod, + ete, exynos_cheap_as_move, exynosm1, exynosm2, @@ -65,12 +71,15 @@ pub const Feature = enum { mpam, mte, neon, + neoversee1, + neoversen1, no_neg_immediates, nv, pa, pan, pan_rwv, perfmon, + pmu, predictable_select_expensive, predres, rand, @@ -122,16 +131,19 @@ pub const Feature = enum { sve2_bitperm, sve2_sha3, sve2_sm4, + tagged_globals, thunderx, thunderx2t99, thunderxt81, thunderxt83, thunderxt88, tlb_rmi, + tme, tpidr_el1, tpidr_el2, tpidr_el3, tracev8_4, + trbe, tsv110, uaops, use_aa, @@ -217,6 +229,21 @@ pub const all_features = blk: { .use_postra_scheduler, }), }; + result[@enumToInt(Feature.a65)] = .{ + .llvm_name = "a65", + .description = "Cortex-A65 ARM processors", + .dependencies = featureSet(&[_]Feature{ + .crypto, + .dotprod, + .fp_armv8, + .fullfp16, + .neon, + .ras, + .rcpc, + .ssbs, + .v8_2a, + }), + }; result[@enumToInt(Feature.a72)] = .{ .llvm_name = "a72", .description = "Cortex-A72 ARM processors", @@ -297,6 +324,110 @@ pub const all_features = blk: { .description = "Enable v8.4-A Activity Monitors extension", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.apple_a10)] = .{ + .llvm_name = "apple-a10", + .description = "Apple A10", + .dependencies = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crc, + .crypto, + .disable_latency_sched_heuristic, + .fp_armv8, + .fuse_aes, + .fuse_crypto_eor, + .lor, + .neon, + .pan, + .perfmon, + .rdm, + .vh, + .zcm, + .zcz, + }), + }; + result[@enumToInt(Feature.apple_a11)] = .{ + .llvm_name = "apple-a11", + .description = "Apple A11", + .dependencies = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crypto, + .disable_latency_sched_heuristic, + .fp_armv8, + .fullfp16, + .fuse_aes, + .fuse_crypto_eor, + .neon, + .perfmon, + .v8_2a, + .zcm, + .zcz, + }), + }; + result[@enumToInt(Feature.apple_a12)] = .{ + .llvm_name = "apple-a12", + .description = "Apple A12", + .dependencies = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crypto, + .disable_latency_sched_heuristic, + .fp_armv8, + .fullfp16, + .fuse_aes, + .fuse_crypto_eor, + .neon, + .perfmon, + .v8_3a, + .zcm, + .zcz, + }), + }; + result[@enumToInt(Feature.apple_a13)] = .{ + .llvm_name = "apple-a13", + .description = "Apple A13", + .dependencies = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crypto, + .disable_latency_sched_heuristic, + .fp_armv8, + .fp16fml, + .fullfp16, + .fuse_aes, + .fuse_crypto_eor, + .neon, + .perfmon, + .sha3, + .v8_4a, + .zcm, + .zcz, + }), + }; + result[@enumToInt(Feature.apple_a7)] = .{ + .llvm_name = "apple-a7", + .description = "Apple A7 (the CPU formerly known as Cyclone)", + .dependencies = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crypto, + .disable_latency_sched_heuristic, + .fp_armv8, + .fuse_aes, + .fuse_crypto_eor, + .neon, + .perfmon, + .zcm, + .zcz, + .zcz_fp_workaround, + }), + }; result[@enumToInt(Feature.arith_bcc_fusion)] = .{ .llvm_name = "arith-bcc-fusion", .description = "CPU fuses arithmetic+bcc operations", @@ -403,25 +534,6 @@ pub const all_features = blk: { .description = "Use custom handling of cheap instructions", .dependencies = featureSet(&[_]Feature{}), }; - result[@enumToInt(Feature.cyclone)] = .{ - .llvm_name = "cyclone", - .description = "Cyclone", - .dependencies = featureSet(&[_]Feature{ - .alternate_sextload_cvt_f32_pattern, - .arith_bcc_fusion, - .arith_cbz_fusion, - .crypto, - .disable_latency_sched_heuristic, - .fp_armv8, - .fuse_aes, - .fuse_crypto_eor, - .neon, - .perfmon, - .zcm, - .zcz, - .zcz_fp_workaround, - }), - }; result[@enumToInt(Feature.disable_latency_sched_heuristic)] = .{ .llvm_name = "disable-latency-sched-heuristic", .description = "Disable latency scheduling heuristic", @@ -437,6 +549,13 @@ pub const all_features = blk: { .description = "Enable dot product support", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.ete)] = .{ + .llvm_name = "ete", + .description = "Enable Embedded Trace Extension", + .dependencies = featureSet(&[_]Feature{ + .trbe, + }), + }; result[@enumToInt(Feature.exynos_cheap_as_move)] = .{ .llvm_name = "exynos-cheap-as-move", .description = "Use Exynos specific handling of cheap instructions", @@ -656,6 +775,35 @@ pub const all_features = blk: { .fp_armv8, }), }; + result[@enumToInt(Feature.neoversee1)] = .{ + .llvm_name = "neoversee1", + .description = "Neoverse E1 ARM processors", + .dependencies = featureSet(&[_]Feature{ + .crypto, + .dotprod, + .fp_armv8, + .fullfp16, + .neon, + .rcpc, + .ssbs, + .v8_2a, + }), + }; + result[@enumToInt(Feature.neoversen1)] = .{ + .llvm_name = "neoversen1", + .description = "Neoverse N1 ARM processors", + .dependencies = featureSet(&[_]Feature{ + .crypto, + .dotprod, + .fp_armv8, + .fullfp16, + .neon, + .rcpc, + .spe, + .ssbs, + .v8_2a, + }), + }; result[@enumToInt(Feature.no_neg_immediates)] = .{ .llvm_name = "no-neg-immediates", .description = "Convert immediates and instructions to their negated or complemented equivalent when the immediate does not fit in the encoding.", @@ -688,6 +836,11 @@ pub const all_features = blk: { .description = "Enable ARMv8 PMUv3 Performance Monitors extension", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.pmu)] = .{ + .llvm_name = "pmu", + .description = "Enable v8.4-A PMU extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.predictable_select_expensive)] = .{ .llvm_name = "predictable-select-expensive", .description = "Prefer likely predicted branches over selects", @@ -979,6 +1132,11 @@ pub const all_features = blk: { .sve2, }), }; + result[@enumToInt(Feature.tagged_globals)] = .{ + .llvm_name = "tagged-globals", + .description = "Use an instruction sequence for taking the address of a global that allows a memory tag in the upper address bits", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.thunderx)] = .{ .llvm_name = "thunderx", .description = "Cavium ThunderX processors", @@ -1052,6 +1210,11 @@ pub const all_features = blk: { .description = "Enable v8.4-A TLB Range and Maintenance Instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.tme)] = .{ + .llvm_name = "tme", + .description = "Enable Transactional Memory Extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.tpidr_el1)] = .{ .llvm_name = "tpidr-el1", .description = "Permit use of TPIDR_EL1 for the TLS base", @@ -1072,6 +1235,11 @@ pub const all_features = blk: { .description = "Enable v8.4-A Trace extension", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.trbe)] = .{ + .llvm_name = "trbe", + .description = "Enable Trace Buffer Extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.tsv110)] = .{ .llvm_name = "tsv110", .description = "HiSilicon TS-V110 processors", @@ -1164,6 +1332,7 @@ pub const all_features = blk: { .fmi, .mpam, .nv, + .pmu, .rasv8_4, .rcpc_immo, .sel2, @@ -1229,11 +1398,74 @@ pub const all_features = blk: { }; pub const cpu = struct { + pub const apple_a10 = Cpu{ + .name = "apple_a10", + .llvm_name = "apple-a10", + .features = featureSet(&[_]Feature{ + .apple_a10, + }), + }; + pub const apple_a11 = Cpu{ + .name = "apple_a11", + .llvm_name = "apple-a11", + .features = featureSet(&[_]Feature{ + .apple_a11, + }), + }; + pub const apple_a12 = Cpu{ + .name = "apple_a12", + .llvm_name = "apple-a12", + .features = featureSet(&[_]Feature{ + .apple_a12, + }), + }; + pub const apple_a13 = Cpu{ + .name = "apple_a13", + .llvm_name = "apple-a13", + .features = featureSet(&[_]Feature{ + .apple_a13, + }), + }; + pub const apple_a7 = Cpu{ + .name = "apple_a7", + .llvm_name = "apple-a7", + .features = featureSet(&[_]Feature{ + .apple_a7, + }), + }; + pub const apple_a8 = Cpu{ + .name = "apple_a8", + .llvm_name = "apple-a8", + .features = featureSet(&[_]Feature{ + .apple_a7, + }), + }; + pub const apple_a9 = Cpu{ + .name = "apple_a9", + .llvm_name = "apple-a9", + .features = featureSet(&[_]Feature{ + .apple_a7, + }), + }; pub const apple_latest = Cpu{ .name = "apple_latest", .llvm_name = "apple-latest", .features = featureSet(&[_]Feature{ - .cyclone, + .apple_a13, + }), + }; + pub const apple_s4 = Cpu{ + .name = "apple_s4", + .llvm_name = "apple-s4", + .features = featureSet(&[_]Feature{ + .apple_a12, + }), + }; + pub const apple_s5 = Cpu{ + .name = "apple_s5", + .llvm_name = "apple-s5", + .features = featureSet(&[_]Feature{ + .apple_a12, }), }; pub const cortex_a35 = Cpu{ @@ -1264,6 +1496,20 @@ pub const cpu = struct { .a57, }), }; + pub const cortex_a65 = Cpu{ + .name = "cortex_a65", + .llvm_name = "cortex-a65", + .features = featureSet(&[_]Feature{ + .a65, + }), + }; + pub const cortex_a65ae = Cpu{ + .name = "cortex_a65ae", + .llvm_name = "cortex-a65ae", + .features = featureSet(&[_]Feature{ + .a65, + }), + }; pub const cortex_a72 = Cpu{ .name = "cortex_a72", .llvm_name = "cortex-a72", @@ -1303,19 +1549,19 @@ pub const cpu = struct { .name = "cyclone", .llvm_name = "cyclone", .features = featureSet(&[_]Feature{ - .cyclone, + .apple_a7, }), }; pub const exynos_m1 = Cpu{ .name = "exynos_m1", - .llvm_name = "exynos-m1", + .llvm_name = null, .features = featureSet(&[_]Feature{ .exynosm1, }), }; pub const exynos_m2 = Cpu{ .name = "exynos_m2", - .llvm_name = "exynos-m2", + .llvm_name = null, .features = featureSet(&[_]Feature{ .exynosm2, }), @@ -1352,6 +1598,7 @@ pub const cpu = struct { .name = "generic", .llvm_name = "generic", .features = featureSet(&[_]Feature{ + .ete, .fp_armv8, .fuse_aes, .neon, @@ -1366,6 +1613,20 @@ pub const cpu = struct { .kryo, }), }; + pub const neoverse_e1 = Cpu{ + .name = "neoverse_e1", + .llvm_name = "neoverse-e1", + .features = featureSet(&[_]Feature{ + .neoversee1, + }), + }; + pub const neoverse_n1 = Cpu{ + .name = "neoverse_n1", + .llvm_name = "neoverse-n1", + .features = featureSet(&[_]Feature{ + .neoversen1, + }), + }; pub const saphira = Cpu{ .name = "saphira", .llvm_name = "saphira", @@ -1421,11 +1682,22 @@ pub const cpu = struct { /// TODO: Replace this with usage of `std.meta.declList`. It does work, but stage1 /// compiler has inefficient memory and CPU usage, affecting build times. pub const all_cpus = &[_]*const Cpu{ + &cpu.apple_a10, + &cpu.apple_a11, + &cpu.apple_a12, + &cpu.apple_a13, + &cpu.apple_a7, + &cpu.apple_a8, + &cpu.apple_a9, &cpu.apple_latest, + &cpu.apple_s4, + &cpu.apple_s5, &cpu.cortex_a35, &cpu.cortex_a53, &cpu.cortex_a55, &cpu.cortex_a57, + &cpu.cortex_a65, + &cpu.cortex_a65ae, &cpu.cortex_a72, &cpu.cortex_a73, &cpu.cortex_a75, @@ -1440,6 +1712,8 @@ pub const all_cpus = &[_]*const Cpu{ &cpu.falkor, &cpu.generic, &cpu.kryo, + &cpu.neoverse_e1, + &cpu.neoverse_n1, &cpu.saphira, &cpu.thunderx, &cpu.thunderx2t99, diff --git a/lib/std/target/amdgpu.zig b/lib/std/target/amdgpu.zig index 182b9fa453..a8da479303 100644 --- a/lib/std/target/amdgpu.zig +++ b/lib/std/target/amdgpu.zig @@ -62,6 +62,7 @@ pub const Feature = enum { max_private_element_size_16, max_private_element_size_4, max_private_element_size_8, + mfma_inline_literal_bug, mimg_r128, movrel, no_data_dep_hazard, @@ -490,6 +491,11 @@ pub const all_features = blk: { .description = "Maximum private access size may be 8", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.mfma_inline_literal_bug)] = .{ + .llvm_name = "mfma-inline-literal-bug", + .description = "MFMA cannot use inline literal as SrcC", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.mimg_r128)] = .{ .llvm_name = "mimg-r128", .description = "Support 128-bit texture resources", @@ -1102,6 +1108,7 @@ pub const cpu = struct { .half_rate_64_ops, .ldsbankcount32, .mai_insts, + .mfma_inline_literal_bug, .pk_fmac_f16_inst, .sram_ecc, }), diff --git a/lib/std/target/arm.zig b/lib/std/target/arm.zig index 62a4e1e835..fd5007de8a 100644 --- a/lib/std/target/arm.zig +++ b/lib/std/target/arm.zig @@ -100,6 +100,9 @@ pub const Feature = enum { muxed_units, mve, mve_fp, + mve1beat, + mve2beat, + mve4beat, nacl_trap, neon, neon_fpmovs, @@ -129,6 +132,7 @@ pub const Feature = enum { slow_odd_reg, slow_vdup32, slow_vgetlni32, + slowfpvfmx, slowfpvmlx, soft_float, splat_vfp_neon, @@ -137,7 +141,6 @@ pub const Feature = enum { thumb_mode, thumb2, trustzone, - use_aa, use_misched, v4t, v5t, @@ -158,8 +161,6 @@ pub const Feature = enum { v8m, v8m_main, vfp2, - vfp2d16, - vfp2d16sp, vfp2sp, vfp3, vfp3d16, @@ -746,9 +747,9 @@ pub const all_features = blk: { .slow_fp_brcc, .slow_vdup32, .slow_vgetlni32, + .slowfpvfmx, .slowfpvmlx, .splat_vfp_neon, - .use_aa, .wide_stride_vfp, .zcz, }), @@ -936,6 +937,21 @@ pub const all_features = blk: { .mve, }), }; + result[@enumToInt(Feature.mve1beat)] = .{ + .llvm_name = "mve1beat", + .description = "Model MVE instructions as a 1 beat per tick architecture", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.mve2beat)] = .{ + .llvm_name = "mve2beat", + .description = "Model MVE instructions as a 2 beats per tick architecture", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.mve4beat)] = .{ + .llvm_name = "mve4beat", + .description = "Model MVE instructions as a 4 beats per tick architecture", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.nacl_trap)] = .{ .llvm_name = "nacl-trap", .description = "NaCl trap", @@ -1085,6 +1101,11 @@ pub const all_features = blk: { .description = "Has slow VGETLNi32 - prefer VMOV", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.slowfpvfmx)] = .{ + .llvm_name = "slowfpvfmx", + .description = "Disable VFP / NEON FMA instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.slowfpvmlx)] = .{ .llvm_name = "slowfpvmlx", .description = "Disable VFP / NEON MAC instructions", @@ -1127,11 +1148,6 @@ pub const all_features = blk: { .description = "Enable support for TrustZone security extensions", .dependencies = featureSet(&[_]Feature{}), }; - result[@enumToInt(Feature.use_aa)] = .{ - .llvm_name = "use-aa", - .description = "Use alias analysis during codegen", - .dependencies = featureSet(&[_]Feature{}), - }; result[@enumToInt(Feature.use_misched)] = .{ .llvm_name = "use-misched", .description = "Use the MachineScheduler", @@ -1269,31 +1285,16 @@ pub const all_features = blk: { result[@enumToInt(Feature.vfp2)] = .{ .llvm_name = "vfp2", .description = "Enable VFP2 instructions", - .dependencies = featureSet(&[_]Feature{ - .vfp2d16, - .vfp2sp, - }), - }; - result[@enumToInt(Feature.vfp2d16)] = .{ - .llvm_name = "vfp2d16", - .description = "Enable VFP2 instructions", .dependencies = featureSet(&[_]Feature{ .fp64, - .vfp2d16sp, - }), - }; - result[@enumToInt(Feature.vfp2d16sp)] = .{ - .llvm_name = "vfp2d16sp", - .description = "Enable VFP2 instructions with no double precision", - .dependencies = featureSet(&[_]Feature{ - .fpregs, + .vfp2sp, }), }; result[@enumToInt(Feature.vfp2sp)] = .{ .llvm_name = "vfp2sp", .description = "Enable VFP2 instructions with no double precision", .dependencies = featureSet(&[_]Feature{ - .vfp2d16sp, + .fpregs, }), }; result[@enumToInt(Feature.vfp3)] = .{ @@ -1704,6 +1705,7 @@ pub const cpu = struct { .mp, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .trustzone, .vfp4, @@ -1758,6 +1760,7 @@ pub const cpu = struct { .mp, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .trustzone, .vfp4, @@ -1838,6 +1841,7 @@ pub const cpu = struct { .nonpipelined_vfp, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .trustzone, .vmlx_forwarding, @@ -1901,7 +1905,6 @@ pub const cpu = struct { .loop_align, .m3, .no_branch_predictor, - .use_aa, .use_misched, }), }; @@ -1914,8 +1917,8 @@ pub const cpu = struct { .fp_armv8d16sp, .loop_align, .no_branch_predictor, + .slowfpvfmx, .slowfpvmlx, - .use_aa, .use_misched, }), }; @@ -1928,8 +1931,8 @@ pub const cpu = struct { .fp_armv8d16sp, .loop_align, .no_branch_predictor, + .slowfpvfmx, .slowfpvmlx, - .use_aa, .use_misched, }), }; @@ -1940,8 +1943,8 @@ pub const cpu = struct { .armv7e_m, .loop_align, .no_branch_predictor, + .slowfpvfmx, .slowfpvmlx, - .use_aa, .use_misched, .vfp4d16sp, }), @@ -1973,6 +1976,7 @@ pub const cpu = struct { .r4, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .vfp3d16, }), @@ -1987,6 +1991,7 @@ pub const cpu = struct { .r5, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .vfp3d16, }), @@ -1998,7 +2003,6 @@ pub const cpu = struct { .armv8_r, .fpao, .r52, - .use_aa, .use_misched, }), }; @@ -2014,6 +2018,7 @@ pub const cpu = struct { .r7, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .vfp3d16, }), @@ -2029,6 +2034,7 @@ pub const cpu = struct { .mp, .ret_addr_stack, .slow_fp_brcc, + .slowfpvfmx, .slowfpvmlx, .vfp3d16, }), @@ -2047,6 +2053,7 @@ pub const cpu = struct { .mp, .neonfp, .ret_addr_stack, + .slowfpvfmx, .slowfpvmlx, .swift, .use_misched, @@ -2063,7 +2070,7 @@ pub const cpu = struct { }; pub const exynos_m1 = Cpu{ .name = "exynos_m1", - .llvm_name = "exynos-m1", + .llvm_name = null, .features = featureSet(&[_]Feature{ .armv8_a, .exynos, @@ -2071,7 +2078,7 @@ pub const cpu = struct { }; pub const exynos_m2 = Cpu{ .name = "exynos_m2", - .llvm_name = "exynos-m2", + .llvm_name = null, .features = featureSet(&[_]Feature{ .armv8_a, .exynos, @@ -2162,6 +2169,18 @@ pub const cpu = struct { .armv6k, }), }; + pub const neoverse_n1 = Cpu{ + .name = "neoverse_n1", + .llvm_name = "neoverse-n1", + .features = featureSet(&[_]Feature{ + .armv8_2_a, + .crc, + .crypto, + .dotprod, + .hwdiv, + .hwdiv_arm, + }), + }; pub const sc000 = Cpu{ .name = "sc000", .llvm_name = "sc000", @@ -2176,7 +2195,6 @@ pub const cpu = struct { .armv7_m, .m3, .no_branch_predictor, - .use_aa, .use_misched, }), }; @@ -2227,6 +2245,7 @@ pub const cpu = struct { .slow_odd_reg, .slow_vdup32, .slow_vgetlni32, + .slowfpvfmx, .slowfpvmlx, .swift, .use_misched, @@ -2322,6 +2341,7 @@ pub const all_cpus = &[_]*const Cpu{ &cpu.kryo, &cpu.mpcore, &cpu.mpcorenovfp, + &cpu.neoverse_n1, &cpu.sc000, &cpu.sc300, &cpu.strongarm, diff --git a/lib/std/target/mips.zig b/lib/std/target/mips.zig index fce7c9ce36..0cc9ab738b 100644 --- a/lib/std/target/mips.zig +++ b/lib/std/target/mips.zig @@ -4,6 +4,7 @@ const Cpu = std.Target.Cpu; pub const Feature = enum { abs2008, cnmips, + cnmipsp, crc, dsp, dspr2, @@ -51,6 +52,7 @@ pub const Feature = enum { use_tcc_in_div, vfpu, virt, + xgot, }; pub usingnamespace Cpu.Feature.feature_set_fns(Feature); @@ -71,6 +73,13 @@ pub const all_features = blk: { .mips64r2, }), }; + result[@enumToInt(Feature.cnmipsp)] = .{ + .llvm_name = "cnmipsp", + .description = "Octeon+ cnMIPS Support", + .dependencies = featureSet(&[_]Feature{ + .cnmips, + }), + }; result[@enumToInt(Feature.crc)] = .{ .llvm_name = "crc", .description = "Mips R6 CRC ASE", @@ -363,6 +372,11 @@ pub const all_features = blk: { .description = "Mips Virtualization ASE", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.xgot)] = .{ + .llvm_name = "xgot", + .description = "Assume 32-bit GOT", + .dependencies = featureSet(&[_]Feature{}), + }; const ti = @typeInfo(Feature); for (result) |*elem, i| { elem.index = i; @@ -372,6 +386,13 @@ pub const all_features = blk: { }; pub const cpu = struct { + pub const generic = Cpu{ + .name = "generic", + .llvm_name = "generic", + .features = featureSet(&[_]Feature{ + .mips32, + }), + }; pub const mips1 = Cpu{ .name = "mips1", .llvm_name = "mips1", @@ -485,6 +506,15 @@ pub const cpu = struct { .mips64r2, }), }; + pub const @"octeon+" = Cpu{ + .name = "octeon+", + .llvm_name = "octeon+", + .features = featureSet(&[_]Feature{ + .cnmips, + .cnmipsp, + .mips64r2, + }), + }; pub const p5600 = Cpu{ .name = "p5600", .llvm_name = "p5600", @@ -498,6 +528,7 @@ pub const cpu = struct { /// TODO: Replace this with usage of `std.meta.declList`. It does work, but stage1 /// compiler has inefficient memory and CPU usage, affecting build times. pub const all_cpus = &[_]*const Cpu{ + &cpu.generic, &cpu.mips1, &cpu.mips2, &cpu.mips3, @@ -514,5 +545,6 @@ pub const all_cpus = &[_]*const Cpu{ &cpu.mips64r5, &cpu.mips64r6, &cpu.octeon, + &cpu.@"octeon+", &cpu.p5600, }; diff --git a/lib/std/target/powerpc.zig b/lib/std/target/powerpc.zig index 41321f7b04..7681caba81 100644 --- a/lib/std/target/powerpc.zig +++ b/lib/std/target/powerpc.zig @@ -4,6 +4,7 @@ const Cpu = std.Target.Cpu; pub const Feature = enum { @"64bit", @"64bitregs", + allow_unaligned_fp_access, altivec, booke, bpermd, @@ -71,6 +72,11 @@ pub const all_features = blk: { .description = "Enable 64-bit registers usage for ppc32 [beta]", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.allow_unaligned_fp_access)] = .{ + .llvm_name = "allow-unaligned-fp-access", + .description = "CPU does not trap on unaligned FP access", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.altivec)] = .{ .llvm_name = "altivec", .description = "Enable Altivec instructions", @@ -560,6 +566,7 @@ pub const cpu = struct { .booke, .icbt, .isel, + .spe, }), }; pub const e500mc = Cpu{ @@ -584,6 +591,45 @@ pub const cpu = struct { .stfiwx, }), }; + pub const future = Cpu{ + .name = "future", + .llvm_name = "future", + .features = featureSet(&[_]Feature{ + .@"64bit", + .allow_unaligned_fp_access, + .altivec, + .bpermd, + .cmpb, + .crypto, + .direct_move, + .extdiv, + .fcpsgn, + .fpcvt, + .fprnd, + .fre, + .fres, + .frsqrte, + .frsqrtes, + .fsqrt, + .htm, + .icbt, + .isa_v30_instructions, + .isel, + .ldbrx, + .lfiwax, + .mfocrf, + .partword_atomics, + .popcntd, + .power8_altivec, + .power8_vector, + .power9_altivec, + .power9_vector, + .recipprec, + .stfiwx, + .two_const_nr, + .vsx, + }), + }; pub const g3 = Cpu{ .name = "g3", .llvm_name = "g3", @@ -662,6 +708,7 @@ pub const cpu = struct { .llvm_name = "ppc64le", .features = featureSet(&[_]Feature{ .@"64bit", + .allow_unaligned_fp_access, .altivec, .bpermd, .cmpb, @@ -793,6 +840,7 @@ pub const cpu = struct { .llvm_name = "pwr7", .features = featureSet(&[_]Feature{ .@"64bit", + .allow_unaligned_fp_access, .altivec, .bpermd, .cmpb, @@ -821,6 +869,7 @@ pub const cpu = struct { .llvm_name = "pwr8", .features = featureSet(&[_]Feature{ .@"64bit", + .allow_unaligned_fp_access, .altivec, .bpermd, .cmpb, @@ -856,6 +905,7 @@ pub const cpu = struct { .llvm_name = "pwr9", .features = featureSet(&[_]Feature{ .@"64bit", + .allow_unaligned_fp_access, .altivec, .bpermd, .cmpb, @@ -917,6 +967,7 @@ pub const all_cpus = &[_]*const Cpu{ &cpu.e500, &cpu.e500mc, &cpu.e5500, + &cpu.future, &cpu.g3, &cpu.g4, &cpu.@"g4+", diff --git a/lib/std/target/riscv.zig b/lib/std/target/riscv.zig index 315329306e..a02f668b78 100644 --- a/lib/std/target/riscv.zig +++ b/lib/std/target/riscv.zig @@ -10,6 +10,38 @@ pub const Feature = enum { f, m, relax, + reserve_x1, + reserve_x10, + reserve_x11, + reserve_x12, + reserve_x13, + reserve_x14, + reserve_x15, + reserve_x16, + reserve_x17, + reserve_x18, + reserve_x19, + reserve_x2, + reserve_x20, + reserve_x21, + reserve_x22, + reserve_x23, + reserve_x24, + reserve_x25, + reserve_x26, + reserve_x27, + reserve_x28, + reserve_x29, + reserve_x3, + reserve_x30, + reserve_x31, + reserve_x4, + reserve_x5, + reserve_x6, + reserve_x7, + reserve_x8, + reserve_x9, + rvc_hints, }; pub usingnamespace Cpu.Feature.feature_set_fns(Feature); @@ -60,6 +92,166 @@ pub const all_features = blk: { .description = "Enable Linker relaxation.", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.reserve_x1)] = .{ + .llvm_name = "reserve-x1", + .description = "Reserve X1", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x10)] = .{ + .llvm_name = "reserve-x10", + .description = "Reserve X10", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x11)] = .{ + .llvm_name = "reserve-x11", + .description = "Reserve X11", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x12)] = .{ + .llvm_name = "reserve-x12", + .description = "Reserve X12", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x13)] = .{ + .llvm_name = "reserve-x13", + .description = "Reserve X13", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x14)] = .{ + .llvm_name = "reserve-x14", + .description = "Reserve X14", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x15)] = .{ + .llvm_name = "reserve-x15", + .description = "Reserve X15", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x16)] = .{ + .llvm_name = "reserve-x16", + .description = "Reserve X16", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x17)] = .{ + .llvm_name = "reserve-x17", + .description = "Reserve X17", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x18)] = .{ + .llvm_name = "reserve-x18", + .description = "Reserve X18", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x19)] = .{ + .llvm_name = "reserve-x19", + .description = "Reserve X19", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x2)] = .{ + .llvm_name = "reserve-x2", + .description = "Reserve X2", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x20)] = .{ + .llvm_name = "reserve-x20", + .description = "Reserve X20", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x21)] = .{ + .llvm_name = "reserve-x21", + .description = "Reserve X21", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x22)] = .{ + .llvm_name = "reserve-x22", + .description = "Reserve X22", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x23)] = .{ + .llvm_name = "reserve-x23", + .description = "Reserve X23", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x24)] = .{ + .llvm_name = "reserve-x24", + .description = "Reserve X24", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x25)] = .{ + .llvm_name = "reserve-x25", + .description = "Reserve X25", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x26)] = .{ + .llvm_name = "reserve-x26", + .description = "Reserve X26", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x27)] = .{ + .llvm_name = "reserve-x27", + .description = "Reserve X27", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x28)] = .{ + .llvm_name = "reserve-x28", + .description = "Reserve X28", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x29)] = .{ + .llvm_name = "reserve-x29", + .description = "Reserve X29", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x3)] = .{ + .llvm_name = "reserve-x3", + .description = "Reserve X3", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x30)] = .{ + .llvm_name = "reserve-x30", + .description = "Reserve X30", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x31)] = .{ + .llvm_name = "reserve-x31", + .description = "Reserve X31", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x4)] = .{ + .llvm_name = "reserve-x4", + .description = "Reserve X4", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x5)] = .{ + .llvm_name = "reserve-x5", + .description = "Reserve X5", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x6)] = .{ + .llvm_name = "reserve-x6", + .description = "Reserve X6", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x7)] = .{ + .llvm_name = "reserve-x7", + .description = "Reserve X7", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x8)] = .{ + .llvm_name = "reserve-x8", + .description = "Reserve X8", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.reserve_x9)] = .{ + .llvm_name = "reserve-x9", + .description = "Reserve X9", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.rvc_hints)] = .{ + .llvm_name = "rvc-hints", + .description = "Enable RVC Hint Instructions.", + .dependencies = featureSet(&[_]Feature{}), + }; const ti = @typeInfo(Feature); for (result) |*elem, i| { elem.index = i; @@ -99,14 +291,16 @@ pub const cpu = struct { pub const generic_rv32 = Cpu{ .name = "generic_rv32", .llvm_name = "generic-rv32", - .features = featureSet(&[_]Feature{}), + .features = featureSet(&[_]Feature{ + .rvc_hints, + }), }; - pub const generic_rv64 = Cpu{ .name = "generic_rv64", .llvm_name = "generic-rv64", .features = featureSet(&[_]Feature{ .@"64bit", + .rvc_hints, }), }; }; diff --git a/lib/std/target/systemz.zig b/lib/std/target/systemz.zig index c924af6e70..088495f42d 100644 --- a/lib/std/target/systemz.zig +++ b/lib/std/target/systemz.zig @@ -450,6 +450,47 @@ pub const cpu = struct { .vector_packed_decimal, }), }; + pub const z15 = Cpu{ + .name = "z15", + .llvm_name = "z15", + .features = featureSet(&[_]Feature{ + .deflate_conversion, + .dfp_packed_conversion, + .dfp_zoned_conversion, + .distinct_ops, + .enhanced_dat_2, + .enhanced_sort, + .execution_hint, + .fast_serialization, + .fp_extension, + .guarded_storage, + .high_word, + .insert_reference_bits_multiple, + .interlocked_access1, + .load_and_trap, + .load_and_zero_rightmost_byte, + .load_store_on_cond, + .load_store_on_cond_2, + .message_security_assist_extension3, + .message_security_assist_extension4, + .message_security_assist_extension5, + .message_security_assist_extension7, + .message_security_assist_extension8, + .message_security_assist_extension9, + .miscellaneous_extensions, + .miscellaneous_extensions_2, + .miscellaneous_extensions_3, + .population_count, + .processor_assist, + .reset_reference_bits_multiple, + .transactional_execution, + .vector, + .vector_enhancements_1, + .vector_enhancements_2, + .vector_packed_decimal, + .vector_packed_decimal_enhancement, + }), + }; pub const z196 = Cpu{ .name = "z196", .llvm_name = "z196", @@ -505,6 +546,7 @@ pub const all_cpus = &[_]*const Cpu{ &cpu.z10, &cpu.z13, &cpu.z14, + &cpu.z15, &cpu.z196, &cpu.zEC12, }; diff --git a/lib/std/target/x86.zig b/lib/std/target/x86.zig index 3c2e306e79..10d33fe6b2 100644 --- a/lib/std/target/x86.zig +++ b/lib/std/target/x86.zig @@ -2,9 +2,12 @@ const std = @import("../std.zig"); const Cpu = std.Target.Cpu; pub const Feature = enum { + @"16bit_mode", + @"32bit_mode", @"3dnow", @"3dnowa", @"64bit", + @"64bit_mode", adx, aes, avx, @@ -45,7 +48,6 @@ pub const Feature = enum { fast_gather, fast_hops, fast_lzcnt, - fast_partial_ymm_or_zmm_write, fast_scalar_fsqrt, fast_scalar_shift_masks, fast_shld_rotate, @@ -78,7 +80,9 @@ pub const Feature = enum { pconfig, pku, popcnt, + prefer_128_bit, prefer_256_bit, + prefer_mask_registers, prefetchwt1, prfchw, ptwrite, @@ -113,8 +117,11 @@ pub const Feature = enum { sse4a, ssse3, tbm, + use_aa, + use_glm_div_sqrt_costs, vaes, vpclmulqdq, + vzeroupper, waitpkg, wbnoinvd, x87, @@ -131,6 +138,16 @@ pub const all_features = blk: { const len = @typeInfo(Feature).Enum.fields.len; std.debug.assert(len <= Cpu.Feature.Set.needed_bit_count); var result: [len]Cpu.Feature = undefined; + result[@enumToInt(Feature.@"16bit_mode")] = .{ + .llvm_name = "16bit-mode", + .description = "16-bit mode (i8086)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.@"32bit_mode")] = .{ + .llvm_name = "32bit-mode", + .description = "32-bit mode (80386)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.@"3dnow")] = .{ .llvm_name = "3dnow", .description = "Enable 3DNow! instructions", @@ -150,6 +167,11 @@ pub const all_features = blk: { .description = "Support 64-bit instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.@"64bit_mode")] = .{ + .llvm_name = "64bit-mode", + .description = "64-bit mode (x86_64)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.adx)] = .{ .llvm_name = "adx", .description = "Support ADX instructions", @@ -385,20 +407,13 @@ pub const all_features = blk: { result[@enumToInt(Feature.fast_hops)] = .{ .llvm_name = "fast-hops", .description = "Prefer horizontal vector math instructions (haddp, phsub, etc.) over normal vector instructions with shuffles", - .dependencies = featureSet(&[_]Feature{ - .sse3, - }), + .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.fast_lzcnt)] = .{ .llvm_name = "fast-lzcnt", .description = "LZCNT instructions are as fast as most simple integer ops", .dependencies = featureSet(&[_]Feature{}), }; - result[@enumToInt(Feature.fast_partial_ymm_or_zmm_write)] = .{ - .llvm_name = "fast-partial-ymm-or-zmm-write", - .description = "Partial writes to YMM/ZMM registers are fast", - .dependencies = featureSet(&[_]Feature{}), - }; result[@enumToInt(Feature.fast_scalar_fsqrt)] = .{ .llvm_name = "fast-scalar-fsqrt", .description = "Scalar SQRT is fast (disable Newton-Raphson)", @@ -528,7 +543,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.mpx)] = .{ .llvm_name = "mpx", - .description = "Support MPX instructions", + .description = "Deprecated. Support MPX instructions", .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.mwaitx)] = .{ @@ -568,11 +583,21 @@ pub const all_features = blk: { .description = "Support POPCNT instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.prefer_128_bit)] = .{ + .llvm_name = "prefer-128-bit", + .description = "Prefer 128-bit AVX instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.prefer_256_bit)] = .{ .llvm_name = "prefer-256-bit", .description = "Prefer 256-bit AVX instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.prefer_mask_registers)] = .{ + .llvm_name = "prefer-mask-registers", + .description = "Prefer AVX512 mask registers over PTEST/MOVMSK", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.prefetchwt1)] = .{ .llvm_name = "prefetchwt1", .description = "Prefetch with Intent to Write and T1 Hint", @@ -762,6 +787,16 @@ pub const all_features = blk: { .description = "Enable TBM instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.use_aa)] = .{ + .llvm_name = "use-aa", + .description = "Use alias analysis during codegen", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.use_glm_div_sqrt_costs)] = .{ + .llvm_name = "use-glm-div-sqrt-costs", + .description = "Use Goldmont specific floating point div/sqrt costs", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.vaes)] = .{ .llvm_name = "vaes", .description = "Promote selected AES instructions to AVX512/AVX registers", @@ -778,6 +813,11 @@ pub const all_features = blk: { .pclmul, }), }; + result[@enumToInt(Feature.vzeroupper)] = .{ + .llvm_name = "vzeroupper", + .description = "Should insert vzeroupper instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.waitpkg)] = .{ .llvm_name = "waitpkg", .description = "Wait and pause enhancements", @@ -846,6 +886,7 @@ pub const cpu = struct { .sahf, .slow_shld, .sse4a, + .vzeroupper, .x87, }), }; @@ -859,6 +900,7 @@ pub const cpu = struct { .nopl, .slow_shld, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -874,6 +916,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -891,6 +934,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -906,6 +950,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -919,6 +964,7 @@ pub const cpu = struct { .nopl, .slow_shld, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -934,6 +980,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -951,6 +998,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -969,6 +1017,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -993,6 +1042,7 @@ pub const cpu = struct { .slow_two_mem_ops, .slow_unaligned_mem_16, .ssse3, + .vzeroupper, .x87, }), }; @@ -1013,6 +1063,7 @@ pub const cpu = struct { .sahf, .slow_shld, .sse4a, + .vzeroupper, .x87, }), }; @@ -1038,6 +1089,7 @@ pub const cpu = struct { .prfchw, .sahf, .slow_shld, + .vzeroupper, .x87, .xop, .xsave, @@ -1070,6 +1122,7 @@ pub const cpu = struct { .sahf, .slow_shld, .tbm, + .vzeroupper, .x87, .xop, .xsave, @@ -1103,6 +1156,7 @@ pub const cpu = struct { .sahf, .slow_shld, .tbm, + .vzeroupper, .x87, .xop, .xsave, @@ -1140,6 +1194,7 @@ pub const cpu = struct { .sahf, .slow_shld, .tbm, + .vzeroupper, .x87, .xop, .xsave, @@ -1167,6 +1222,7 @@ pub const cpu = struct { .slow_two_mem_ops, .slow_unaligned_mem_16, .ssse3, + .vzeroupper, .x87, }), }; @@ -1209,6 +1265,7 @@ pub const cpu = struct { .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -1235,6 +1292,7 @@ pub const cpu = struct { .slow_shld, .sse4a, .ssse3, + .vzeroupper, .x87, }), }; @@ -1254,7 +1312,6 @@ pub const cpu = struct { .fast_bextr, .fast_hops, .fast_lzcnt, - .fast_partial_ymm_or_zmm_write, .fast_scalar_shift_masks, .fast_vector_shift_masks, .fxsr, @@ -1280,6 +1337,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"3dnow", .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1293,6 +1351,7 @@ pub const cpu = struct { .mmx, .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -1335,11 +1394,11 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, @@ -1348,6 +1407,7 @@ pub const cpu = struct { .sha, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsavec, @@ -1395,17 +1455,18 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsavec, @@ -1454,17 +1515,18 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsavec, @@ -1499,6 +1561,7 @@ pub const cpu = struct { .slow_3ops_lea, .slow_unaligned_mem_32, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -1540,6 +1603,7 @@ pub const cpu = struct { .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -1560,6 +1624,7 @@ pub const cpu = struct { .sahf, .slow_unaligned_mem_16, .ssse3, + .vzeroupper, .x87, }), }; @@ -1578,6 +1643,7 @@ pub const cpu = struct { .popcnt, .sahf, .sse4_2, + .vzeroupper, .x87, }), }; @@ -1605,6 +1671,7 @@ pub const cpu = struct { .slow_3ops_lea, .slow_unaligned_mem_32, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -1616,6 +1683,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1626,6 +1694,7 @@ pub const cpu = struct { .@"3dnowa", .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1644,7 +1713,6 @@ pub const cpu = struct { .fxsr, .mmx, .movbe, - .mpx, .nopl, .pclmul, .popcnt, @@ -1658,6 +1726,8 @@ pub const cpu = struct { .slow_two_mem_ops, .sse4_2, .ssse3, + .use_glm_div_sqrt_costs, + .vzeroupper, .x87, .xsave, .xsavec, @@ -1679,7 +1749,6 @@ pub const cpu = struct { .fxsr, .mmx, .movbe, - .mpx, .nopl, .pclmul, .popcnt, @@ -1696,6 +1765,8 @@ pub const cpu = struct { .slow_two_mem_ops, .sse4_2, .ssse3, + .use_glm_div_sqrt_costs, + .vzeroupper, .x87, .xsave, .xsavec, @@ -1739,6 +1810,7 @@ pub const cpu = struct { .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -1749,6 +1821,7 @@ pub const cpu = struct { .llvm_name = "i386", .features = featureSet(&[_]Feature{ .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1757,6 +1830,7 @@ pub const cpu = struct { .llvm_name = "i486", .features = featureSet(&[_]Feature{ .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1766,6 +1840,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1776,6 +1851,7 @@ pub const cpu = struct { .cmov, .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1824,11 +1900,11 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdpid, .rdrnd, @@ -1840,6 +1916,7 @@ pub const cpu = struct { .sse4_2, .vaes, .vpclmulqdq, + .vzeroupper, .x87, .xsave, .xsavec, @@ -1892,12 +1969,12 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pconfig, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdpid, .rdrnd, @@ -1909,6 +1986,7 @@ pub const cpu = struct { .sse4_2, .vaes, .vpclmulqdq, + .vzeroupper, .wbnoinvd, .x87, .xsave, @@ -1944,6 +2022,7 @@ pub const cpu = struct { .slow_3ops_lea, .slow_unaligned_mem_32, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -1956,6 +2035,7 @@ pub const cpu = struct { .cx8, .mmx, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1966,6 +2046,7 @@ pub const cpu = struct { .@"3dnow", .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1976,6 +2057,7 @@ pub const cpu = struct { .@"3dnow", .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1993,6 +2075,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2011,6 +2094,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2032,7 +2116,6 @@ pub const cpu = struct { .cx8, .f16c, .fast_gather, - .fast_partial_ymm_or_zmm_write, .fma, .fsgsbase, .fxsr, @@ -2043,6 +2126,7 @@ pub const cpu = struct { .nopl, .pclmul, .popcnt, + .prefer_mask_registers, .prefetchwt1, .prfchw, .rdrnd, @@ -2076,7 +2160,6 @@ pub const cpu = struct { .cx8, .f16c, .fast_gather, - .fast_partial_ymm_or_zmm_write, .fma, .fsgsbase, .fxsr, @@ -2087,6 +2170,7 @@ pub const cpu = struct { .nopl, .pclmul, .popcnt, + .prefer_mask_registers, .prefetchwt1, .prfchw, .rdrnd, @@ -2104,7 +2188,9 @@ pub const cpu = struct { pub const lakemont = Cpu{ .name = "lakemont", .llvm_name = "lakemont", - .features = featureSet(&[_]Feature{}), + .features = featureSet(&[_]Feature{ + .vzeroupper, + }), }; pub const nehalem = Cpu{ .name = "nehalem", @@ -2121,6 +2207,7 @@ pub const cpu = struct { .popcnt, .sahf, .sse4_2, + .vzeroupper, .x87, }), }; @@ -2137,6 +2224,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2154,6 +2242,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2172,6 +2261,7 @@ pub const cpu = struct { .slow_shld, .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2190,6 +2280,7 @@ pub const cpu = struct { .sahf, .slow_unaligned_mem_16, .sse4_1, + .vzeroupper, .x87, }), }; @@ -2199,6 +2290,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .cx8, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2213,6 +2305,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2223,6 +2316,7 @@ pub const cpu = struct { .cx8, .mmx, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2236,6 +2330,7 @@ pub const cpu = struct { .mmx, .nopl, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2250,6 +2345,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -2264,6 +2360,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -2278,6 +2375,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2292,6 +2390,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2303,6 +2402,7 @@ pub const cpu = struct { .cx8, .nopl, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2317,6 +2417,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2344,6 +2445,7 @@ pub const cpu = struct { .slow_3ops_lea, .slow_unaligned_mem_32, .sse4_2, + .vzeroupper, .x87, .xsave, .xsaveopt, @@ -2374,6 +2476,7 @@ pub const cpu = struct { .slow_two_mem_ops, .sse4_2, .ssse3, + .vzeroupper, .x87, }), }; @@ -2416,17 +2519,18 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsavec, @@ -2467,7 +2571,6 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .popcnt, @@ -2478,6 +2581,7 @@ pub const cpu = struct { .sgx, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsavec, @@ -2524,17 +2628,18 @@ pub const cpu = struct { .merge_to_threeway_branch, .mmx, .movbe, - .mpx, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, .slow_3ops_lea, .sse4_2, + .vzeroupper, .x87, .xsave, .xsavec, @@ -2567,9 +2672,83 @@ pub const cpu = struct { .slow_two_mem_ops, .sse4_2, .ssse3, + .vzeroupper, .x87, }), }; + pub const tigerlake = Cpu{ + .name = "tigerlake", + .llvm_name = "tigerlake", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .aes, + .avx, + .avx2, + .avx512bitalg, + .avx512bw, + .avx512cd, + .avx512dq, + .avx512f, + .avx512ifma, + .avx512vbmi, + .avx512vbmi2, + .avx512vl, + .avx512vnni, + .avx512vp2intersect, + .avx512vpopcntdq, + .bmi, + .bmi2, + .clflushopt, + .clwb, + .cmov, + .cx16, + .cx8, + .ermsb, + .f16c, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .merge_to_threeway_branch, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .nopl, + .pclmul, + .pku, + .popcnt, + .prefer_256_bit, + .prfchw, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .sgx, + .sha, + .shstk, + .slow_3ops_lea, + .sse4_2, + .vaes, + .vpclmulqdq, + .vzeroupper, + .x87, + .xsave, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const tremont = Cpu{ .name = "tremont", .llvm_name = "tremont", @@ -2588,7 +2767,6 @@ pub const cpu = struct { .movbe, .movdir64b, .movdiri, - .mpx, .nopl, .pclmul, .popcnt, @@ -2605,6 +2783,8 @@ pub const cpu = struct { .slow_two_mem_ops, .sse4_2, .ssse3, + .use_glm_div_sqrt_costs, + .vzeroupper, .waitpkg, .x87, .xsave, @@ -2629,6 +2809,7 @@ pub const cpu = struct { .popcnt, .sahf, .sse4_2, + .vzeroupper, .x87, }), }; @@ -2638,6 +2819,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .mmx, .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2647,6 +2829,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"3dnow", .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2664,6 +2847,7 @@ pub const cpu = struct { .slow_3ops_lea, .slow_incdec, .sse2, + .vzeroupper, .x87, }), }; @@ -2678,6 +2862,7 @@ pub const cpu = struct { .nopl, .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2718,6 +2903,7 @@ pub const cpu = struct { .sha, .slow_shld, .sse4a, + .vzeroupper, .x87, .xsave, .xsavec, @@ -2764,6 +2950,7 @@ pub const cpu = struct { .sha, .slow_shld, .sse4a, + .vzeroupper, .wbnoinvd, .x87, .xsave, @@ -2848,6 +3035,7 @@ pub const all_cpus = &[_]*const Cpu{ &cpu.skylake, &cpu.skylake_avx512, &cpu.slm, + &cpu.tigerlake, &cpu.tremont, &cpu.westmere, &cpu.winchip_c6,