mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 04:48:20 +00:00
std.zig.system.x86: Synchronize CPUID/XGETBV checks with LLVM 19.
Also fix a bunch of cases where we didn't toggle features off if the relevant leaf isn't available, and switch XCR0 checks to a packed struct. Closes #23385.
This commit is contained in:
parent
de62dc884e
commit
7289a073e5
@ -2,11 +2,30 @@ const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const Target = std.Target;
|
||||
|
||||
const XCR0_XMM = 0x02;
|
||||
const XCR0_YMM = 0x04;
|
||||
const XCR0_MASKREG = 0x20;
|
||||
const XCR0_ZMM0_15 = 0x40;
|
||||
const XCR0_ZMM16_31 = 0x80;
|
||||
/// Only covers EAX for now.
|
||||
const Xcr0 = packed struct(u32) {
|
||||
x87: bool,
|
||||
sse: bool,
|
||||
avx: bool,
|
||||
bndreg: bool,
|
||||
bndcsr: bool,
|
||||
opmask: bool,
|
||||
zmm_hi256: bool,
|
||||
hi16_zmm: bool,
|
||||
pt: bool,
|
||||
pkru: bool,
|
||||
pasid: bool,
|
||||
cet_u: bool,
|
||||
cet_s: bool,
|
||||
hdc: bool,
|
||||
uintr: bool,
|
||||
lbr: bool,
|
||||
hwp: bool,
|
||||
xtilecfg: bool,
|
||||
xtiledata: bool,
|
||||
apx: bool,
|
||||
_reserved: u12,
|
||||
};
|
||||
|
||||
fn setFeature(cpu: *Target.Cpu, feature: Target.x86.Feature, enabled: bool) void {
|
||||
const idx = @as(Target.Cpu.Feature.Set.Index, @intFromEnum(feature));
|
||||
@ -339,12 +358,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
|
||||
leaf = cpuid(1, 0);
|
||||
|
||||
setFeature(cpu, .cx8, bit(leaf.edx, 8));
|
||||
setFeature(cpu, .cmov, bit(leaf.edx, 15));
|
||||
setFeature(cpu, .mmx, bit(leaf.edx, 23));
|
||||
setFeature(cpu, .fxsr, bit(leaf.edx, 24));
|
||||
setFeature(cpu, .sse, bit(leaf.edx, 25));
|
||||
setFeature(cpu, .sse2, bit(leaf.edx, 26));
|
||||
setFeature(cpu, .sse3, bit(leaf.ecx, 0));
|
||||
setFeature(cpu, .pclmul, bit(leaf.ecx, 1));
|
||||
setFeature(cpu, .ssse3, bit(leaf.ecx, 9));
|
||||
@ -356,13 +369,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
setFeature(cpu, .aes, bit(leaf.ecx, 25));
|
||||
setFeature(cpu, .rdrnd, bit(leaf.ecx, 30));
|
||||
|
||||
setFeature(cpu, .cx8, bit(leaf.edx, 8));
|
||||
setFeature(cpu, .cmov, bit(leaf.edx, 15));
|
||||
setFeature(cpu, .mmx, bit(leaf.edx, 23));
|
||||
setFeature(cpu, .fxsr, bit(leaf.edx, 24));
|
||||
setFeature(cpu, .sse, bit(leaf.edx, 25));
|
||||
setFeature(cpu, .sse2, bit(leaf.edx, 26));
|
||||
|
||||
const has_xsave = bit(leaf.ecx, 27);
|
||||
const has_avx = bit(leaf.ecx, 28);
|
||||
|
||||
// Make sure not to call xgetbv if xsave is not supported
|
||||
const xcr0_eax = if (has_xsave and has_avx) getXCR0() else 0;
|
||||
const xcr0: Xcr0 = if (has_xsave and has_avx) @bitCast(getXCR0()) else @bitCast(@as(u32, 0));
|
||||
|
||||
const has_avx_save = hasMask(xcr0_eax, XCR0_XMM | XCR0_YMM);
|
||||
const has_avx_save = xcr0.sse and xcr0.avx;
|
||||
|
||||
// LLVM approaches avx512_save by hardcoding it to true on Darwin,
|
||||
// because the kernel saves the context even if the bit is not set.
|
||||
@ -384,22 +404,26 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
// Darwin lazily saves the AVX512 context on first use: trust that the OS will
|
||||
// save the AVX512 context if we use AVX512 instructions, even if the bit is not
|
||||
// set right now.
|
||||
const has_avx512_save = switch (os_tag.isDarwin()) {
|
||||
true => true,
|
||||
false => hasMask(xcr0_eax, XCR0_MASKREG | XCR0_ZMM0_15 | XCR0_ZMM16_31),
|
||||
};
|
||||
const has_avx512_save = if (os_tag.isDarwin())
|
||||
true
|
||||
else
|
||||
xcr0.zmm_hi256 and xcr0.hi16_zmm;
|
||||
|
||||
// AMX requires additional context to be saved by the OS.
|
||||
const has_amx_save = xcr0.xtilecfg and xcr0.xtiledata;
|
||||
|
||||
setFeature(cpu, .avx, has_avx_save);
|
||||
setFeature(cpu, .fma, has_avx_save and bit(leaf.ecx, 12));
|
||||
setFeature(cpu, .fma, bit(leaf.ecx, 12) and has_avx_save);
|
||||
// Only enable XSAVE if OS has enabled support for saving YMM state.
|
||||
setFeature(cpu, .xsave, has_avx_save and bit(leaf.ecx, 26));
|
||||
setFeature(cpu, .f16c, has_avx_save and bit(leaf.ecx, 29));
|
||||
setFeature(cpu, .xsave, bit(leaf.ecx, 26) and has_avx_save);
|
||||
setFeature(cpu, .f16c, bit(leaf.ecx, 29) and has_avx_save);
|
||||
|
||||
leaf = cpuid(0x80000000, 0);
|
||||
const max_ext_level = leaf.eax;
|
||||
|
||||
if (max_ext_level >= 0x80000001) {
|
||||
leaf = cpuid(0x80000001, 0);
|
||||
|
||||
setFeature(cpu, .sahf, bit(leaf.ecx, 0));
|
||||
setFeature(cpu, .lzcnt, bit(leaf.ecx, 5));
|
||||
setFeature(cpu, .sse4a, bit(leaf.ecx, 6));
|
||||
@ -409,11 +433,21 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
setFeature(cpu, .fma4, bit(leaf.ecx, 16) and has_avx_save);
|
||||
setFeature(cpu, .tbm, bit(leaf.ecx, 21));
|
||||
setFeature(cpu, .mwaitx, bit(leaf.ecx, 29));
|
||||
|
||||
setFeature(cpu, .@"64bit", bit(leaf.edx, 29));
|
||||
} else {
|
||||
for ([_]Target.x86.Feature{
|
||||
.sahf, .lzcnt, .sse4a, .prfchw, .xop,
|
||||
.lwp, .fma4, .tbm, .mwaitx, .@"64bit",
|
||||
.sahf,
|
||||
.lzcnt,
|
||||
.sse4a,
|
||||
.prfchw,
|
||||
.xop,
|
||||
.lwp,
|
||||
.fma4,
|
||||
.tbm,
|
||||
.mwaitx,
|
||||
|
||||
.@"64bit",
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
@ -422,10 +456,16 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
// Misc. memory-related features.
|
||||
if (max_ext_level >= 0x80000008) {
|
||||
leaf = cpuid(0x80000008, 0);
|
||||
|
||||
setFeature(cpu, .clzero, bit(leaf.ebx, 0));
|
||||
setFeature(cpu, .rdpru, bit(leaf.ebx, 4));
|
||||
setFeature(cpu, .wbnoinvd, bit(leaf.ebx, 9));
|
||||
} else {
|
||||
for ([_]Target.x86.Feature{ .clzero, .wbnoinvd }) |feat| {
|
||||
for ([_]Target.x86.Feature{
|
||||
.clzero,
|
||||
.rdpru,
|
||||
.wbnoinvd,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
}
|
||||
@ -444,6 +484,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
setFeature(cpu, .rtm, bit(leaf.ebx, 11));
|
||||
// AVX512 is only supported if the OS supports the context save for it.
|
||||
setFeature(cpu, .avx512f, bit(leaf.ebx, 16) and has_avx512_save);
|
||||
setFeature(cpu, .evex512, bit(leaf.ebx, 16) and has_avx512_save);
|
||||
setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
|
||||
setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
|
||||
setFeature(cpu, .adx, bit(leaf.ebx, 19));
|
||||
@ -470,8 +511,8 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
setFeature(cpu, .avx512vnni, bit(leaf.ecx, 11) and has_avx512_save);
|
||||
setFeature(cpu, .avx512bitalg, bit(leaf.ecx, 12) and has_avx512_save);
|
||||
setFeature(cpu, .avx512vpopcntdq, bit(leaf.ecx, 14) and has_avx512_save);
|
||||
setFeature(cpu, .avx512vp2intersect, bit(leaf.edx, 8) and has_avx512_save);
|
||||
setFeature(cpu, .rdpid, bit(leaf.ecx, 22));
|
||||
setFeature(cpu, .kl, bit(leaf.ecx, 23));
|
||||
setFeature(cpu, .cldemote, bit(leaf.ecx, 25));
|
||||
setFeature(cpu, .movdiri, bit(leaf.ecx, 27));
|
||||
setFeature(cpu, .movdir64b, bit(leaf.ecx, 28));
|
||||
@ -487,32 +528,153 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
// leaves using cpuid, since that information is ignored while
|
||||
// detecting features using the "-march=native" flag.
|
||||
// For more info, see X86 ISA docs.
|
||||
setFeature(cpu, .pconfig, bit(leaf.edx, 18));
|
||||
setFeature(cpu, .uintr, bit(leaf.edx, 5));
|
||||
setFeature(cpu, .avx512vp2intersect, bit(leaf.edx, 8) and has_avx512_save);
|
||||
setFeature(cpu, .serialize, bit(leaf.edx, 14));
|
||||
setFeature(cpu, .tsxldtrk, bit(leaf.edx, 16));
|
||||
setFeature(cpu, .pconfig, bit(leaf.edx, 18));
|
||||
setFeature(cpu, .amx_bf16, bit(leaf.edx, 22) and has_amx_save);
|
||||
setFeature(cpu, .avx512fp16, bit(leaf.edx, 23) and has_avx512_save);
|
||||
setFeature(cpu, .amx_tile, bit(leaf.edx, 24) and has_amx_save);
|
||||
setFeature(cpu, .amx_int8, bit(leaf.edx, 25) and has_amx_save);
|
||||
|
||||
// TODO I feel unsure about this check.
|
||||
// It doesn't really seem to check for 7.1, just for 7.
|
||||
// Is this a sound assumption to make?
|
||||
// Note that this is what other implementations do, so I kind of trust it.
|
||||
const has_leaf_7_1 = max_level >= 7;
|
||||
if (has_leaf_7_1) {
|
||||
if (leaf.eax >= 1) {
|
||||
leaf = cpuid(0x7, 0x1);
|
||||
|
||||
setFeature(cpu, .sha512, bit(leaf.eax, 0));
|
||||
setFeature(cpu, .sm3, bit(leaf.eax, 1));
|
||||
setFeature(cpu, .sm4, bit(leaf.eax, 2));
|
||||
setFeature(cpu, .raoint, bit(leaf.eax, 3));
|
||||
setFeature(cpu, .avxvnni, bit(leaf.eax, 4) and has_avx_save);
|
||||
setFeature(cpu, .avx512bf16, bit(leaf.eax, 5) and has_avx512_save);
|
||||
setFeature(cpu, .cmpccxadd, bit(leaf.eax, 7));
|
||||
setFeature(cpu, .amx_fp16, bit(leaf.eax, 21) and has_amx_save);
|
||||
setFeature(cpu, .hreset, bit(leaf.eax, 22));
|
||||
setFeature(cpu, .avxifma, bit(leaf.eax, 23) and has_avx_save);
|
||||
|
||||
setFeature(cpu, .avxvnniint8, bit(leaf.edx, 4) and has_avx_save);
|
||||
setFeature(cpu, .avxneconvert, bit(leaf.edx, 5) and has_avx_save);
|
||||
setFeature(cpu, .amx_complex, bit(leaf.edx, 8) and has_amx_save);
|
||||
setFeature(cpu, .avxvnniint16, bit(leaf.edx, 10) and has_avx_save);
|
||||
setFeature(cpu, .prefetchi, bit(leaf.edx, 14));
|
||||
setFeature(cpu, .usermsr, bit(leaf.edx, 15));
|
||||
setFeature(cpu, .avx10_1_256, bit(leaf.edx, 19));
|
||||
// APX
|
||||
setFeature(cpu, .egpr, bit(leaf.edx, 21));
|
||||
setFeature(cpu, .push2pop2, bit(leaf.edx, 21));
|
||||
setFeature(cpu, .ppx, bit(leaf.edx, 21));
|
||||
setFeature(cpu, .ndd, bit(leaf.edx, 21));
|
||||
setFeature(cpu, .ccmp, bit(leaf.edx, 21));
|
||||
setFeature(cpu, .cf, bit(leaf.edx, 21));
|
||||
} else {
|
||||
setFeature(cpu, .avx512bf16, false);
|
||||
for ([_]Target.x86.Feature{
|
||||
.sha512,
|
||||
.sm3,
|
||||
.sm4,
|
||||
.raoint,
|
||||
.avxvnni,
|
||||
.avx512bf16,
|
||||
.cmpccxadd,
|
||||
.amx_fp16,
|
||||
.hreset,
|
||||
.avxifma,
|
||||
|
||||
.avxvnniint8,
|
||||
.avxneconvert,
|
||||
.amx_complex,
|
||||
.avxvnniint16,
|
||||
.prefetchi,
|
||||
.usermsr,
|
||||
.avx10_1_256,
|
||||
.egpr,
|
||||
.push2pop2,
|
||||
.ppx,
|
||||
.ndd,
|
||||
.ccmp,
|
||||
.cf,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for ([_]Target.x86.Feature{
|
||||
.fsgsbase, .sgx, .bmi, .avx2,
|
||||
.bmi2, .invpcid, .rtm, .avx512f,
|
||||
.avx512dq, .rdseed, .adx, .avx512ifma,
|
||||
.clflushopt, .clwb, .avx512pf, .avx512er,
|
||||
.avx512cd, .sha, .avx512bw, .avx512vl,
|
||||
.prefetchwt1, .avx512vbmi, .pku, .waitpkg,
|
||||
.avx512vbmi2, .shstk, .gfni, .vaes,
|
||||
.vpclmulqdq, .avx512vnni, .avx512bitalg, .avx512vpopcntdq,
|
||||
.avx512vp2intersect, .rdpid, .cldemote, .movdiri,
|
||||
.movdir64b, .enqcmd, .pconfig, .avx512bf16,
|
||||
.fsgsbase,
|
||||
.sgx,
|
||||
.bmi,
|
||||
.avx2,
|
||||
.smep,
|
||||
.bmi2,
|
||||
.invpcid,
|
||||
.rtm,
|
||||
.avx512f,
|
||||
.evex512,
|
||||
.avx512dq,
|
||||
.rdseed,
|
||||
.adx,
|
||||
.smap,
|
||||
.avx512ifma,
|
||||
.clflushopt,
|
||||
.clwb,
|
||||
.avx512pf,
|
||||
.avx512er,
|
||||
.avx512cd,
|
||||
.sha,
|
||||
.avx512bw,
|
||||
.avx512vl,
|
||||
|
||||
.prefetchwt1,
|
||||
.avx512vbmi,
|
||||
.pku,
|
||||
.waitpkg,
|
||||
.avx512vbmi2,
|
||||
.shstk,
|
||||
.gfni,
|
||||
.vaes,
|
||||
.vpclmulqdq,
|
||||
.avx512vnni,
|
||||
.avx512bitalg,
|
||||
.avx512vpopcntdq,
|
||||
.rdpid,
|
||||
.kl,
|
||||
.cldemote,
|
||||
.movdiri,
|
||||
.movdir64b,
|
||||
.enqcmd,
|
||||
|
||||
.uintr,
|
||||
.avx512vp2intersect,
|
||||
.serialize,
|
||||
.tsxldtrk,
|
||||
.pconfig,
|
||||
.amx_bf16,
|
||||
.avx512fp16,
|
||||
.amx_tile,
|
||||
.amx_int8,
|
||||
|
||||
.sha512,
|
||||
.sm3,
|
||||
.sm4,
|
||||
.raoint,
|
||||
.avxvnni,
|
||||
.avx512bf16,
|
||||
.cmpccxadd,
|
||||
.amx_fp16,
|
||||
.hreset,
|
||||
.avxifma,
|
||||
|
||||
.avxvnniint8,
|
||||
.avxneconvert,
|
||||
.amx_complex,
|
||||
.avxvnniint16,
|
||||
.prefetchi,
|
||||
.usermsr,
|
||||
.avx10_1_256,
|
||||
.egpr,
|
||||
.push2pop2,
|
||||
.ppx,
|
||||
.ndd,
|
||||
.ccmp,
|
||||
.cf,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
@ -520,21 +682,55 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
|
||||
|
||||
if (max_level >= 0xD and has_avx_save) {
|
||||
leaf = cpuid(0xD, 0x1);
|
||||
|
||||
// Only enable XSAVE if OS has enabled support for saving YMM state.
|
||||
setFeature(cpu, .xsaveopt, bit(leaf.eax, 0));
|
||||
setFeature(cpu, .xsavec, bit(leaf.eax, 1));
|
||||
setFeature(cpu, .xsaves, bit(leaf.eax, 3));
|
||||
} else {
|
||||
for ([_]Target.x86.Feature{ .xsaveopt, .xsavec, .xsaves }) |feat| {
|
||||
for ([_]Target.x86.Feature{
|
||||
.xsaveopt,
|
||||
.xsavec,
|
||||
.xsaves,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (max_level >= 0x14) {
|
||||
leaf = cpuid(0x14, 0);
|
||||
|
||||
setFeature(cpu, .ptwrite, bit(leaf.ebx, 4));
|
||||
} else {
|
||||
setFeature(cpu, .ptwrite, false);
|
||||
for ([_]Target.x86.Feature{
|
||||
.ptwrite,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (max_level >= 0x19) {
|
||||
leaf = cpuid(0x19, 0);
|
||||
|
||||
setFeature(cpu, .widekl, bit(leaf.ebx, 2));
|
||||
} else {
|
||||
for ([_]Target.x86.Feature{
|
||||
.widekl,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (max_level >= 0x24) {
|
||||
leaf = cpuid(0x24, 0);
|
||||
|
||||
setFeature(cpu, .avx10_1_512, bit(leaf.ebx, 18));
|
||||
} else {
|
||||
for ([_]Target.x86.Feature{
|
||||
.avx10_1_512,
|
||||
}) |feat| {
|
||||
setFeature(cpu, feat, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user