From 1f61a00a419e814c619ccdc5a93177752a4f6ddd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Nov 2022 21:36:56 +0100 Subject: [PATCH] Merge pull request #13659 from ziglang/arm-win-cpu-features windows: add native CPU and features detection for Armv8 chips --- lib/std/os/windows.zig | 441 +++++++++++++++++++++++- lib/std/os/windows/kernel32.zig | 12 + lib/std/os/windows/ntdll.zig | 16 + lib/std/zig/system/NativeTargetInfo.zig | 1 + lib/std/zig/system/arm.zig | 134 +++++++ lib/std/zig/system/linux.zig | 131 +------ lib/std/zig/system/windows.zig | 321 +++++++++++++++++ 7 files changed, 931 insertions(+), 125 deletions(-) create mode 100644 lib/std/zig/system/arm.zig diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index f5a5a45156..ce14a253c5 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2089,6 +2089,7 @@ pub const LPWSTR = [*:0]WCHAR; pub const LPCWSTR = [*:0]const WCHAR; pub const PVOID = *anyopaque; pub const PWSTR = [*:0]WCHAR; +pub const PCWSTR = [*:0]const WCHAR; pub const SIZE_T = usize; pub const UINT = c_uint; pub const ULONG_PTR = usize; @@ -2104,6 +2105,7 @@ pub const USHORT = u16; pub const SHORT = i16; pub const ULONG = u32; pub const LONG = i32; +pub const ULONG64 = u64; pub const ULONGLONG = u64; pub const LONGLONG = i64; pub const HLOCAL = HANDLE; @@ -2504,6 +2506,7 @@ pub const STANDARD_RIGHTS_READ = READ_CONTROL; pub const STANDARD_RIGHTS_WRITE = READ_CONTROL; pub const STANDARD_RIGHTS_EXECUTE = READ_CONTROL; pub const STANDARD_RIGHTS_REQUIRED = DELETE | READ_CONTROL | WRITE_DAC | WRITE_OWNER; +pub const MAXIMUM_ALLOWED = 0x02000000; // disposition for NtCreateFile pub const FILE_SUPERSEDE = 0; @@ -2872,9 +2875,143 @@ pub const PROV_RSA_FULL = 1; pub const REGSAM = ACCESS_MASK; pub const ACCESS_MASK = DWORD; -pub const HKEY = *opaque {}; pub const LSTATUS = LONG; +pub const HKEY = *opaque {}; + +pub const HKEY_LOCAL_MACHINE: HKEY = @intToPtr(HKEY, 0x80000002); + +/// Combines the STANDARD_RIGHTS_REQUIRED, KEY_QUERY_VALUE, KEY_SET_VALUE, KEY_CREATE_SUB_KEY, +/// KEY_ENUMERATE_SUB_KEYS, KEY_NOTIFY, and KEY_CREATE_LINK access rights. +pub const KEY_ALL_ACCESS = 0xF003F; +/// Reserved for system use. +pub const KEY_CREATE_LINK = 0x0020; +/// Required to create a subkey of a registry key. +pub const KEY_CREATE_SUB_KEY = 0x0004; +/// Required to enumerate the subkeys of a registry key. +pub const KEY_ENUMERATE_SUB_KEYS = 0x0008; +/// Equivalent to KEY_READ. +pub const KEY_EXECUTE = 0x20019; +/// Required to request change notifications for a registry key or for subkeys of a registry key. +pub const KEY_NOTIFY = 0x0010; +/// Required to query the values of a registry key. +pub const KEY_QUERY_VALUE = 0x0001; +/// Combines the STANDARD_RIGHTS_READ, KEY_QUERY_VALUE, KEY_ENUMERATE_SUB_KEYS, and KEY_NOTIFY values. +pub const KEY_READ = 0x20019; +/// Required to create, delete, or set a registry value. +pub const KEY_SET_VALUE = 0x0002; +/// Indicates that an application on 64-bit Windows should operate on the 32-bit registry view. +/// This flag is ignored by 32-bit Windows. +pub const KEY_WOW64_32KEY = 0x0200; +/// Indicates that an application on 64-bit Windows should operate on the 64-bit registry view. +/// This flag is ignored by 32-bit Windows. +pub const KEY_WOW64_64KEY = 0x0100; +/// Combines the STANDARD_RIGHTS_WRITE, KEY_SET_VALUE, and KEY_CREATE_SUB_KEY access rights. +pub const KEY_WRITE = 0x20006; + +/// Open symbolic link. +pub const REG_OPTION_OPEN_LINK: DWORD = 0x8; + +pub const RTL_QUERY_REGISTRY_TABLE = extern struct { + QueryRoutine: RTL_QUERY_REGISTRY_ROUTINE, + Flags: ULONG, + Name: ?PWSTR, + EntryContext: ?*anyopaque, + DefaultType: ULONG, + DefaultData: ?*anyopaque, + DefaultLength: ULONG, +}; + +pub const RTL_QUERY_REGISTRY_ROUTINE = ?std.meta.FnPtr(fn ( + PWSTR, + ULONG, + ?*anyopaque, + ULONG, + ?*anyopaque, + ?*anyopaque, +) callconv(WINAPI) NTSTATUS); + +/// Path is a full path +pub const RTL_REGISTRY_ABSOLUTE = 0; +/// \Registry\Machine\System\CurrentControlSet\Services +pub const RTL_REGISTRY_SERVICES = 1; +/// \Registry\Machine\System\CurrentControlSet\Control +pub const RTL_REGISTRY_CONTROL = 2; +/// \Registry\Machine\Software\Microsoft\Windows NT\CurrentVersion +pub const RTL_REGISTRY_WINDOWS_NT = 3; +/// \Registry\Machine\Hardware\DeviceMap +pub const RTL_REGISTRY_DEVICEMAP = 4; +/// \Registry\User\CurrentUser +pub const RTL_REGISTRY_USER = 5; +pub const RTL_REGISTRY_MAXIMUM = 6; + +/// Low order bits are registry handle +pub const RTL_REGISTRY_HANDLE = 0x40000000; +/// Indicates the key node is optional +pub const RTL_REGISTRY_OPTIONAL = 0x80000000; + +/// Name is a subkey and remainder of table or until next subkey are value +/// names for that subkey to look at. +pub const RTL_QUERY_REGISTRY_SUBKEY = 0x00000001; + +/// Reset current key to original key for this and all following table entries. +pub const RTL_QUERY_REGISTRY_TOPKEY = 0x00000002; + +/// Fail if no match found for this table entry. +pub const RTL_QUERY_REGISTRY_REQUIRED = 0x00000004; + +/// Used to mark a table entry that has no value name, just wants a call out, not +/// an enumeration of all values. +pub const RTL_QUERY_REGISTRY_NOVALUE = 0x00000008; + +/// Used to suppress the expansion of REG_MULTI_SZ into multiple callouts or +/// to prevent the expansion of environment variable values in REG_EXPAND_SZ. +pub const RTL_QUERY_REGISTRY_NOEXPAND = 0x00000010; + +/// QueryRoutine field ignored. EntryContext field points to location to store value. +/// For null terminated strings, EntryContext points to UNICODE_STRING structure that +/// that describes maximum size of buffer. If .Buffer field is NULL then a buffer is +/// allocated. +pub const RTL_QUERY_REGISTRY_DIRECT = 0x00000020; + +/// Used to delete value keys after they are queried. +pub const RTL_QUERY_REGISTRY_DELETE = 0x00000040; + +/// Use this flag with the RTL_QUERY_REGISTRY_DIRECT flag to verify that the REG_XXX type +/// of the stored registry value matches the type expected by the caller. +/// If the types do not match, the call fails. +pub const RTL_QUERY_REGISTRY_TYPECHECK = 0x00000100; + +pub const REG = struct { + /// No value type + pub const NONE: ULONG = 0; + /// Unicode nul terminated string + pub const SZ: ULONG = 1; + /// Unicode nul terminated string (with environment variable references) + pub const EXPAND_SZ: ULONG = 2; + /// Free form binary + pub const BINARY: ULONG = 3; + /// 32-bit number + pub const DWORD: ULONG = 4; + /// 32-bit number (same as REG_DWORD) + pub const DWORD_LITTLE_ENDIAN: ULONG = 4; + /// 32-bit number + pub const DWORD_BIG_ENDIAN: ULONG = 5; + /// Symbolic Link (unicode) + pub const LINK: ULONG = 6; + /// Multiple Unicode strings + pub const MULTI_SZ: ULONG = 7; + /// Resource list in the resource map + pub const RESOURCE_LIST: ULONG = 8; + /// Resource list in the hardware description + pub const FULL_RESOURCE_DESCRIPTOR: ULONG = 9; + pub const RESOURCE_REQUIREMENTS_LIST: ULONG = 10; + /// 64-bit number + pub const QWORD: ULONG = 11; + /// 64-bit number (same as REG_QWORD) + pub const QWORD_LITTLE_ENDIAN: ULONG = 11; +}; + pub const FILE_NOTIFY_INFORMATION = extern struct { NextEntryOffset: DWORD, Action: DWORD, @@ -3715,3 +3852,305 @@ pub const CTRL_LOGOFF_EVENT: DWORD = 5; pub const CTRL_SHUTDOWN_EVENT: DWORD = 6; pub const HANDLER_ROUTINE = std.meta.FnPtr(fn (dwCtrlType: DWORD) callconv(WINAPI) BOOL); + +/// Processor feature enumeration. +pub const PF = enum(DWORD) { + /// On a Pentium, a floating-point precision error can occur in rare circumstances. + FLOATING_POINT_PRECISION_ERRATA = 0, + + /// Floating-point operations are emulated using software emulator. + /// This function returns a nonzero value if floating-point operations are emulated; otherwise, it returns zero. + FLOATING_POINT_EMULATED = 1, + + /// The atomic compare and exchange operation (cmpxchg) is available. + COMPARE_EXCHANGE_DOUBLE = 2, + + /// The MMX instruction set is available. + MMX_INSTRUCTIONS_AVAILABLE = 3, + + PPC_MOVEMEM_64BIT_OK = 4, + ALPHA_BYTE_INSTRUCTIONS = 5, + + /// The SSE instruction set is available. + XMMI_INSTRUCTIONS_AVAILABLE = 6, + + /// The 3D-Now instruction is available. + @"3DNOW_INSTRUCTIONS_AVAILABLE" = 7, + + /// The RDTSC instruction is available. + RDTSC_INSTRUCTION_AVAILABLE = 8, + + /// The processor is PAE-enabled. + PAE_ENABLED = 9, + + /// The SSE2 instruction set is available. + XMMI64_INSTRUCTIONS_AVAILABLE = 10, + + SSE_DAZ_MODE_AVAILABLE = 11, + + /// Data execution prevention is enabled. + NX_ENABLED = 12, + + /// The SSE3 instruction set is available. + SSE3_INSTRUCTIONS_AVAILABLE = 13, + + /// The atomic compare and exchange 128-bit operation (cmpxchg16b) is available. + COMPARE_EXCHANGE128 = 14, + + /// The atomic compare 64 and exchange 128-bit operation (cmp8xchg16) is available. + COMPARE64_EXCHANGE128 = 15, + + /// The processor channels are enabled. + CHANNELS_ENABLED = 16, + + /// The processor implements the XSAVI and XRSTOR instructions. + XSAVE_ENABLED = 17, + + /// The VFP/Neon: 32 x 64bit register bank is present. + /// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS. + ARM_VFP_32_REGISTERS_AVAILABLE = 18, + + /// This ARM processor implements the ARM v8 NEON instruction set. + ARM_NEON_INSTRUCTIONS_AVAILABLE = 19, + + /// Second Level Address Translation is supported by the hardware. + SECOND_LEVEL_ADDRESS_TRANSLATION = 20, + + /// Virtualization is enabled in the firmware and made available by the operating system. + VIRT_FIRMWARE_ENABLED = 21, + + /// RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions are available. + RDWRFSGBASE_AVAILABLE = 22, + + /// _fastfail() is available. + FASTFAIL_AVAILABLE = 23, + + /// The divide instruction_available. + ARM_DIVIDE_INSTRUCTION_AVAILABLE = 24, + + /// The 64-bit load/store atomic instructions are available. + ARM_64BIT_LOADSTORE_ATOMIC = 25, + + /// The external cache is available. + ARM_EXTERNAL_CACHE_AVAILABLE = 26, + + /// The floating-point multiply-accumulate instruction is available. + ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27, + + RDRAND_INSTRUCTION_AVAILABLE = 28, + + /// This ARM processor implements the ARM v8 instructions set. + ARM_V8_INSTRUCTIONS_AVAILABLE = 29, + + /// This ARM processor implements the ARM v8 extra cryptographic instructions (i.e., AES, SHA1 and SHA2). + ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30, + + /// This ARM processor implements the ARM v8 extra CRC32 instructions. + ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31, + + RDTSCP_INSTRUCTION_AVAILABLE = 32, + RDPID_INSTRUCTION_AVAILABLE = 33, + + /// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP). + ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34, + + MONITORX_INSTRUCTION_AVAILABLE = 35, + + /// The SSSE3 instruction set is available. + SSSE3_INSTRUCTIONS_AVAILABLE = 36, + + /// The SSE4_1 instruction set is available. + SSE4_1_INSTRUCTIONS_AVAILABLE = 37, + + /// The SSE4_2 instruction set is available. + SSE4_2_INSTRUCTIONS_AVAILABLE = 38, + + /// The AVX instruction set is available. + AVX_INSTRUCTIONS_AVAILABLE = 39, + + /// The AVX2 instruction set is available. + AVX2_INSTRUCTIONS_AVAILABLE = 40, + + /// The AVX512F instruction set is available. + AVX512F_INSTRUCTIONS_AVAILABLE = 41, + + ERMS_AVAILABLE = 42, + + /// This ARM processor implements the ARM v8.2 Dot Product (DP) instructions. + ARM_V82_DP_INSTRUCTIONS_AVAILABLE = 43, + + /// This ARM processor implements the ARM v8.3 JavaScript conversion (JSCVT) instructions. + ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44, +}; + +pub const MAX_WOW64_SHARED_ENTRIES = 16; +pub const PROCESSOR_FEATURE_MAX = 64; +pub const MAXIMUM_XSTATE_FEATURES = 64; + +pub const KSYSTEM_TIME = extern struct { + LowPart: ULONG, + High1Time: LONG, + High2Time: LONG, +}; + +pub const NT_PRODUCT_TYPE = enum(INT) { + NtProductWinNt = 1, + NtProductLanManNt, + NtProductServer, +}; + +pub const ALTERNATIVE_ARCHITECTURE_TYPE = enum(INT) { + StandardDesign, + NEC98x86, + EndAlternatives, +}; + +pub const XSTATE_FEATURE = extern struct { + Offset: ULONG, + Size: ULONG, +}; + +pub const XSTATE_CONFIGURATION = extern struct { + EnabledFeatures: ULONG64, + Size: ULONG, + OptimizedSave: ULONG, + Features: [MAXIMUM_XSTATE_FEATURES]XSTATE_FEATURE, +}; + +/// Shared Kernel User Data +pub const KUSER_SHARED_DATA = extern struct { + TickCountLowDeprecated: ULONG, + TickCountMultiplier: ULONG, + InterruptTime: KSYSTEM_TIME, + SystemTime: KSYSTEM_TIME, + TimeZoneBias: KSYSTEM_TIME, + ImageNumberLow: USHORT, + ImageNumberHigh: USHORT, + NtSystemRoot: [260]WCHAR, + MaxStackTraceDepth: ULONG, + CryptoExponent: ULONG, + TimeZoneId: ULONG, + LargePageMinimum: ULONG, + AitSamplingValue: ULONG, + AppCompatFlag: ULONG, + RNGSeedVersion: ULONGLONG, + GlobalValidationRunlevel: ULONG, + TimeZoneBiasStamp: LONG, + NtBuildNumber: ULONG, + NtProductType: NT_PRODUCT_TYPE, + ProductTypeIsValid: BOOLEAN, + Reserved0: [1]BOOLEAN, + NativeProcessorArchitecture: USHORT, + NtMajorVersion: ULONG, + NtMinorVersion: ULONG, + ProcessorFeatures: [PROCESSOR_FEATURE_MAX]BOOLEAN, + Reserved1: ULONG, + Reserved3: ULONG, + TimeSlip: ULONG, + AlternativeArchitecture: ALTERNATIVE_ARCHITECTURE_TYPE, + BootId: ULONG, + SystemExpirationDate: LARGE_INTEGER, + SuiteMaskY: ULONG, + KdDebuggerEnabled: BOOLEAN, + DummyUnion1: extern union { + MitigationPolicies: UCHAR, + Alt: packed struct { + NXSupportPolicy: u2, + SEHValidationPolicy: u2, + CurDirDevicesSkippedForDlls: u2, + Reserved: u2, + }, + }, + CyclesPerYield: USHORT, + ActiveConsoleId: ULONG, + DismountCount: ULONG, + ComPlusPackage: ULONG, + LastSystemRITEventTickCount: ULONG, + NumberOfPhysicalPages: ULONG, + SafeBootMode: BOOLEAN, + DummyUnion2: extern union { + VirtualizationFlags: UCHAR, + Alt: packed struct { + ArchStartedInEl2: u1, + QcSlIsSupported: u1, + SpareBits: u6, + }, + }, + Reserved12: [2]UCHAR, + DummyUnion3: extern union { + SharedDataFlags: ULONG, + Alt: packed struct { + DbgErrorPortPresent: u1, + DbgElevationEnabled: u1, + DbgVirtEnabled: u1, + DbgInstallerDetectEnabled: u1, + DbgLkgEnabled: u1, + DbgDynProcessorEnabled: u1, + DbgConsoleBrokerEnabled: u1, + DbgSecureBootEnabled: u1, + DbgMultiSessionSku: u1, + DbgMultiUsersInSessionSku: u1, + DbgStateSeparationEnabled: u1, + SpareBits: u21, + }, + }, + DataFlagsPad: [1]ULONG, + TestRetInstruction: ULONGLONG, + QpcFrequency: LONGLONG, + SystemCall: ULONG, + Reserved2: ULONG, + SystemCallPad: [2]ULONGLONG, + DummyUnion4: extern union { + TickCount: KSYSTEM_TIME, + TickCountQuad: ULONG64, + Alt: extern struct { + ReservedTickCountOverlay: [3]ULONG, + TickCountPad: [1]ULONG, + }, + }, + Cookie: ULONG, + CookiePad: [1]ULONG, + ConsoleSessionForegroundProcessId: LONGLONG, + TimeUpdateLock: ULONGLONG, + BaselineSystemTimeQpc: ULONGLONG, + BaselineInterruptTimeQpc: ULONGLONG, + QpcSystemTimeIncrement: ULONGLONG, + QpcInterruptTimeIncrement: ULONGLONG, + QpcSystemTimeIncrementShift: UCHAR, + QpcInterruptTimeIncrementShift: UCHAR, + UnparkedProcessorCount: USHORT, + EnclaveFeatureMask: [4]ULONG, + TelemetryCoverageRound: ULONG, + UserModeGlobalLogger: [16]USHORT, + ImageFileExecutionOptions: ULONG, + LangGenerationCount: ULONG, + Reserved4: ULONGLONG, + InterruptTimeBias: ULONGLONG, + QpcBias: ULONGLONG, + ActiveProcessorCount: ULONG, + ActiveGroupCount: UCHAR, + Reserved9: UCHAR, + DummyUnion5: extern union { + QpcData: USHORT, + Alt: extern struct { + QpcBypassEnabled: UCHAR, + QpcShift: UCHAR, + }, + }, + TimeZoneBiasEffectiveStart: LARGE_INTEGER, + TimeZoneBiasEffectiveEnd: LARGE_INTEGER, + XState: XSTATE_CONFIGURATION, + FeatureConfigurationChangeStamp: KSYSTEM_TIME, + Spare: ULONG, + UserPointerAuthMask: ULONG64, +}; + +/// Read-only user-mode address for the shared data. +/// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm +/// https://msrc-blog.microsoft.com/2022/04/05/randomizing-the-kuser_shared_data-structure-on-windows/ +pub const SharedUserData: *const KUSER_SHARED_DATA = @intToPtr(*const KUSER_SHARED_DATA, 0x7FFE0000); + +pub fn IsProcessorFeaturePresent(feature: PF) bool { + if (@enumToInt(feature) >= PROCESSOR_FEATURE_MAX) return false; + return SharedUserData.ProcessorFeatures[@enumToInt(feature)] == 1; +} diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index 8d146def7f..e0c7b96f84 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -10,6 +10,7 @@ const DWORD = windows.DWORD; const FILE_INFO_BY_HANDLE_CLASS = windows.FILE_INFO_BY_HANDLE_CLASS; const HANDLE = windows.HANDLE; const HMODULE = windows.HMODULE; +const HKEY = windows.HKEY; const HRESULT = windows.HRESULT; const LARGE_INTEGER = windows.LARGE_INTEGER; const LPCWSTR = windows.LPCWSTR; @@ -57,6 +58,8 @@ const UCHAR = windows.UCHAR; const FARPROC = windows.FARPROC; const INIT_ONCE_FN = windows.INIT_ONCE_FN; const PMEMORY_BASIC_INFORMATION = windows.PMEMORY_BASIC_INFORMATION; +const REGSAM = windows.REGSAM; +const LSTATUS = windows.LSTATUS; pub extern "kernel32" fn AddVectoredExceptionHandler(First: c_ulong, Handler: ?VECTORED_EXCEPTION_HANDLER) callconv(WINAPI) ?*anyopaque; pub extern "kernel32" fn RemoveVectoredExceptionHandler(Handle: HANDLE) callconv(WINAPI) c_ulong; @@ -231,6 +234,7 @@ pub extern "kernel32" fn GetQueuedCompletionStatusEx( pub extern "kernel32" fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) callconv(WINAPI) void; pub extern "kernel32" fn GetSystemTimeAsFileTime(*FILETIME) callconv(WINAPI) void; +pub extern "kernel32" fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) BOOL; pub extern "kernel32" fn HeapCreate(flOptions: DWORD, dwInitialSize: SIZE_T, dwMaximumSize: SIZE_T) callconv(WINAPI) ?HANDLE; pub extern "kernel32" fn HeapDestroy(hHeap: HANDLE) callconv(WINAPI) BOOL; @@ -411,3 +415,11 @@ pub extern "kernel32" fn SleepConditionVariableSRW( pub extern "kernel32" fn TryAcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) BOOLEAN; pub extern "kernel32" fn AcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void; pub extern "kernel32" fn ReleaseSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void; + +pub extern "kernel32" fn RegOpenKeyExW( + hkey: HKEY, + lpSubKey: LPCWSTR, + ulOptions: DWORD, + samDesired: REGSAM, + phkResult: *HKEY, +) callconv(WINAPI) LSTATUS; diff --git a/lib/std/os/windows/ntdll.zig b/lib/std/os/windows/ntdll.zig index bf9dc9bd2f..b006a785da 100644 --- a/lib/std/os/windows/ntdll.zig +++ b/lib/std/os/windows/ntdll.zig @@ -22,6 +22,8 @@ const RTL_OSVERSIONINFOW = windows.RTL_OSVERSIONINFOW; const FILE_BASIC_INFORMATION = windows.FILE_BASIC_INFORMATION; const SIZE_T = windows.SIZE_T; const CURDIR = windows.CURDIR; +const PCWSTR = windows.PCWSTR; +const RTL_QUERY_REGISTRY_TABLE = windows.RTL_QUERY_REGISTRY_TABLE; pub const THREADINFOCLASS = enum(c_int) { ThreadBasicInformation, @@ -253,3 +255,17 @@ pub extern "ntdll" fn NtUnlockFile( Length: *const LARGE_INTEGER, Key: ?*ULONG, ) callconv(WINAPI) NTSTATUS; + +pub extern "ntdll" fn NtOpenKey( + KeyHandle: *HANDLE, + DesiredAccess: ACCESS_MASK, + ObjectAttributes: OBJECT_ATTRIBUTES, +) callconv(WINAPI) NTSTATUS; + +pub extern "ntdll" fn RtlQueryRegistryValues( + RelativeTo: ULONG, + Path: PCWSTR, + QueryTable: [*]RTL_QUERY_REGISTRY_TABLE, + Context: ?*anyopaque, + Environment: ?*anyopaque, +) callconv(WINAPI) NTSTATUS; diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig index c7b3f73f89..cae45af64b 100644 --- a/lib/std/zig/system/NativeTargetInfo.zig +++ b/lib/std/zig/system/NativeTargetInfo.zig @@ -978,6 +978,7 @@ fn detectNativeCpuAndFeatures(cpu_arch: Target.Cpu.Arch, os: Target.Os, cross_ta switch (builtin.os.tag) { .linux => return linux.detectNativeCpuAndFeatures(), .macos => return darwin.macos.detectNativeCpuAndFeatures(), + .windows => return windows.detectNativeCpuAndFeatures(), else => {}, } diff --git a/lib/std/zig/system/arm.zig b/lib/std/zig/system/arm.zig new file mode 100644 index 0000000000..b6f06206bc --- /dev/null +++ b/lib/std/zig/system/arm.zig @@ -0,0 +1,134 @@ +const std = @import("std"); + +pub const CoreInfo = struct { + architecture: u8 = 0, + implementer: u8 = 0, + variant: u8 = 0, + part: u16 = 0, +}; + +pub const cpu_models = struct { + // Shorthands to simplify the tables below. + const A32 = std.Target.arm.cpu; + const A64 = std.Target.aarch64.cpu; + + const E = struct { + part: u16, + variant: ?u8 = null, // null if matches any variant + m32: ?*const std.Target.Cpu.Model = null, + m64: ?*const std.Target.Cpu.Model = null, + }; + + // implementer = 0x41 + const ARM = [_]E{ + E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, + E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, + E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, + E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, + E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, + E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, + E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, + E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, + E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, + E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, + E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, + E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, + E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, + E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, + E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, + E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, + E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, + E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, + E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, + E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, + E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, + E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, + E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, + E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, + E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, + E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, + E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, + E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, + E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, + E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, + E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, + E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, + E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, + E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, + E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, + E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, + // This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html + E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c }, + E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, + E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, + E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, + E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, + }; + // implementer = 0x42 + const Broadcom = [_]E{ + E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x43 + const Cavium = [_]E{ + E{ .part = 0x0a0, .m64 = &A64.thunderx }, + E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, + E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, + E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, + E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x46 + const Fujitsu = [_]E{ + E{ .part = 0x001, .m64 = &A64.a64fx }, + }; + // implementer = 0x48 + const HiSilicon = [_]E{ + E{ .part = 0xd01, .m64 = &A64.tsv110 }, + }; + // implementer = 0x4e + const Nvidia = [_]E{ + E{ .part = 0x004, .m64 = &A64.carmel }, + }; + // implementer = 0x50 + const Ampere = [_]E{ + E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, + E{ .part = 0x000, .m64 = &A64.xgene1 }, + }; + // implementer = 0x51 + const Qualcomm = [_]E{ + E{ .part = 0x06f, .m32 = &A32.krait }, + E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0xc00, .m64 = &A64.falkor }, + E{ .part = 0xc01, .m64 = &A64.saphira }, + }; + + pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model { + const models = switch (core.implementer) { + 0x41 => &ARM, + 0x42 => &Broadcom, + 0x43 => &Cavium, + 0x46 => &Fujitsu, + 0x48 => &HiSilicon, + 0x50 => &Ampere, + 0x51 => &Qualcomm, + else => return null, + }; + + for (models) |model| { + if (model.part == core.part and + (model.variant == null or model.variant.? == core.variant)) + return if (is_64bit) model.m64 else model.m32; + } + + return null; + } +}; diff --git a/lib/std/zig/system/linux.zig b/lib/std/zig/system/linux.zig index 5e24490d82..7183f4ccc2 100644 --- a/lib/std/zig/system/linux.zig +++ b/lib/std/zig/system/linux.zig @@ -159,129 +159,7 @@ const ArmCpuinfoImpl = struct { is_really_v6: bool = false, }; - const cpu_models = struct { - // Shorthands to simplify the tables below. - const A32 = Target.arm.cpu; - const A64 = Target.aarch64.cpu; - - const E = struct { - part: u16, - variant: ?u8 = null, // null if matches any variant - m32: ?*const Target.Cpu.Model = null, - m64: ?*const Target.Cpu.Model = null, - }; - - // implementer = 0x41 - const ARM = [_]E{ - E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, - E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, - E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, - E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, - E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, - E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, - E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, - E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, - E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, - E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, - E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, - E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, - E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, - E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, - E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, - E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, - E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, - E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, - E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, - E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, - E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, - E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, - E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, - E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, - E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, - E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, - E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, - E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, - E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, - E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, - E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, - E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, - E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, - E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, - E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, - E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, - E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, - E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, - E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, - E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, - }; - // implementer = 0x42 - const Broadcom = [_]E{ - E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x43 - const Cavium = [_]E{ - E{ .part = 0x0a0, .m64 = &A64.thunderx }, - E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, - E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, - E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, - E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x46 - const Fujitsu = [_]E{ - E{ .part = 0x001, .m64 = &A64.a64fx }, - }; - // implementer = 0x48 - const HiSilicon = [_]E{ - E{ .part = 0xd01, .m64 = &A64.tsv110 }, - }; - // implementer = 0x4e - const Nvidia = [_]E{ - E{ .part = 0x004, .m64 = &A64.carmel }, - }; - // implementer = 0x50 - const Ampere = [_]E{ - E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, - E{ .part = 0x000, .m64 = &A64.xgene1 }, - }; - // implementer = 0x51 - const Qualcomm = [_]E{ - E{ .part = 0x06f, .m32 = &A32.krait }, - E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0xc00, .m64 = &A64.falkor }, - E{ .part = 0xc01, .m64 = &A64.saphira }, - }; - - fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model { - const models = switch (core.implementer) { - 0x41 => &ARM, - 0x42 => &Broadcom, - 0x43 => &Cavium, - 0x46 => &Fujitsu, - 0x48 => &HiSilicon, - 0x50 => &Ampere, - 0x51 => &Qualcomm, - else => return null, - }; - - for (models) |model| { - if (model.part == core.part and - (model.variant == null or model.variant.? == core.variant)) - return if (is_64bit) model.m64 else model.m32; - } - - return null; - } - }; + const cpu_models = @import("arm.zig").cpu_models; fn addOne(self: *ArmCpuinfoImpl) void { if (self.have_fields == 4 and self.core_no < self.cores.len) { @@ -346,7 +224,12 @@ const ArmCpuinfoImpl = struct { var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; for (self.cores[0..self.core_no]) |core, i| { - known_models[i] = cpu_models.isKnown(core, is_64bit); + known_models[i] = cpu_models.isKnown(.{ + .architecture = core.architecture, + .implementer = core.implementer, + .variant = core.variant, + .part = core.part, + }, is_64bit); } // XXX We pick the first core on big.LITTLE systems, hopefully the diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 595dac6278..f11905873d 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -1,6 +1,12 @@ const std = @import("std"); +const builtin = @import("builtin"); +const mem = std.mem; +const Target = std.Target; pub const WindowsVersion = std.Target.Os.WindowsVersion; +pub const PF = std.os.windows.PF; +pub const REG = std.os.windows.REG; +pub const IsProcessorFeaturePresent = std.os.windows.IsProcessorFeaturePresent; /// Returns the highest known WindowsVersion deduced from reported runtime information. /// Discards information about in-between versions we don't differentiate. @@ -38,3 +44,318 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } + +// Technically, a registry value can be as long as 1MB. However, MS recommends storing +// values larger than 2048 bytes in a file rather than directly in the registry, and since we +// are only accessing a system hive \Registry\Machine, we stick to MS guidelines. +// https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits +const max_value_len = 2048; + +const RegistryPair = struct { + key: []const u8, + value: std.os.windows.ULONG, +}; + +fn getCpuInfoFromRegistry( + core: usize, + comptime pairs_num: comptime_int, + comptime pairs: [pairs_num]RegistryPair, + out_buf: *[pairs_num][max_value_len]u8, +) !void { + // Originally, I wanted to issue a single call with a more complex table structure such that we + // would sequentially visit each CPU#d subkey in the registry and pull the value of interest into + // a buffer, however, NT seems to be expecting a single buffer per each table meaning we would + // end up pulling only the last CPU core info, overwriting everything else. + // If anyone can come up with a solution to this, please do! + const table_size = 1 + pairs.len; + var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; + + const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); + + const max_cpu_buf = 4; + var next_cpu_buf: [max_cpu_buf]u8 = undefined; + const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{core}); + + var subkey: [max_cpu_buf + 1]u16 = undefined; + const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu); + subkey[subkey_len] = 0; + + table[0] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = subkey[0..subkey_len :0], + .EntryContext = null, + .DefaultType = REG.NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + + inline for (pairs) |pair, i| { + const ctx: *anyopaque = blk: { + switch (pair.value) { + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + var buf: [max_value_len / 2]u16 = undefined; + var unicode = std.os.windows.UNICODE_STRING{ + .Length = 0, + .MaximumLength = max_value_len, + .Buffer = &buf, + }; + break :blk &unicode; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + => { + var buf: [4]u8 = undefined; + break :blk &buf; + }, + + REG.QWORD => { + var buf: [8]u8 = undefined; + break :blk &buf; + }, + + else => unreachable, + } + }; + const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); + + table[i + 1] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)), + .EntryContext = ctx, + .DefaultType = REG.NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + } + + // Table sentinel + table[table_size] = .{ + .QueryRoutine = null, + .Flags = 0, + .Name = null, + .EntryContext = null, + .DefaultType = 0, + .DefaultData = null, + .DefaultLength = 0, + }; + + const res = std.os.windows.ntdll.RtlQueryRegistryValues( + std.os.windows.RTL_REGISTRY_ABSOLUTE, + topkey, + &table, + null, + null, + ); + switch (res) { + .SUCCESS => { + inline for (pairs) |pair, i| switch (pair.value) { + REG.NONE => unreachable, + + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[i + 1].EntryContext); + const len = try std.unicode.utf16leToUtf8(out_buf[i][0..], entry.Buffer[0 .. entry.Length / 2]); + out_buf[i][len] = 0; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + REG.QWORD, + => { + const entry = @ptrCast([*]align(1) const u8, table[i + 1].EntryContext); + switch (pair.value) { + REG.DWORD, REG.DWORD_BIG_ENDIAN => { + mem.copy(u8, out_buf[i][0..4], entry[0..4]); + }, + REG.QWORD => { + mem.copy(u8, out_buf[i][0..8], entry[0..8]); + }, + else => unreachable, + } + }, + + else => unreachable, + }; + }, + else => return error.Unexpected, + } +} + +fn getCpuCount() usize { + return std.os.windows.peb().NumberOfProcessors; +} + +const ArmCpuInfoImpl = struct { + cores: [4]CoreInfo = undefined, + core_no: usize = 0, + have_fields: usize = 0, + + const CoreInfo = @import("arm.zig").CoreInfo; + const cpu_models = @import("arm.zig").cpu_models; + + const Data = struct { + cp_4000: []const u8, + identifier: []const u8, + }; + + fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void { + const info = &self.cores[self.core_no]; + info.* = .{}; + + // CPU part + info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4; + self.have_fields += 1; + + // CPU implementer + info.implementer = data.cp_4000[3]; + self.have_fields += 1; + + var tokens = mem.tokenize(u8, data.identifier, " "); + while (tokens.next()) |token| { + if (mem.eql(u8, "Family", token)) { + // CPU architecture + const family = tokens.next() orelse continue; + info.architecture = try std.fmt.parseInt(u8, family, 10); + self.have_fields += 1; + break; + } + } else return; + + self.addOne(); + } + + fn addOne(self: *ArmCpuInfoImpl) void { + if (self.have_fields == 3 and self.core_no < self.cores.len) { + if (self.core_no > 0) { + // Deduplicate the core info. + for (self.cores[0..self.core_no]) |it| { + if (std.meta.eql(it, self.cores[self.core_no])) + return; + } + } + self.core_no += 1; + } + } + + fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu { + if (self.core_no == 0) return null; + + const is_64bit = switch (arch) { + .aarch64, .aarch64_be, .aarch64_32 => true, + else => false, + }; + + var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; + for (self.cores[0..self.core_no]) |core, i| { + known_models[i] = cpu_models.isKnown(core, is_64bit); + } + + // XXX We pick the first core on big.LITTLE systems, hopefully the + // LITTLE one. + const model = known_models[0] orelse return null; + return Target.Cpu{ + .arch = arch, + .model = model, + .features = model.features, + }; + } +}; + +const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl); + +fn CpuInfoParser(comptime impl: anytype) type { + return struct { + fn parse(arch: Target.Cpu.Arch) !?Target.Cpu { + var obj: impl = .{}; + var out_buf: [2][max_value_len]u8 = undefined; + + var i: usize = 0; + while (i < getCpuCount()) : (i += 1) { + try getCpuInfoFromRegistry(i, 2, .{ + .{ .key = "CP 4000", .value = REG.QWORD }, + .{ .key = "Identifier", .value = REG.SZ }, + }, &out_buf); + + const cp_4000 = out_buf[0][0..8]; + const identifier = mem.sliceTo(out_buf[1][0..], 0); + + try obj.parseDataHook(.{ + .cp_4000 = cp_4000, + .identifier = identifier, + }); + } + + return obj.finalize(arch); + } + }; +} + +fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu { + return .{ + .arch = arch, + .model = Target.Cpu.Model.generic(arch), + .features = Target.Cpu.Feature.Set.empty, + }; +} + +pub fn detectNativeCpuAndFeatures() ?Target.Cpu { + const current_arch = builtin.cpu.arch; + switch (current_arch) { + .aarch64, .aarch64_be, .aarch64_32 => { + var cpu = cpu: { + var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch); + break :cpu maybe_cpu orelse genericCpu(current_arch); + }; + + const Feature = Target.aarch64.Feature; + + // Override any features that are either present or absent + if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.neon)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.neon)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crc)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crc)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crypto)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crypto)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.lse)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.lse)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.dotprod)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.dotprod)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.jsconv)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.jsconv)); + } + + return cpu; + }, + else => {}, + } +}