From 1eac3c57eca2e736275d0d5066f0ff431ae1f9e8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Nov 2022 14:28:38 +0100 Subject: [PATCH 01/15] windows: add processor feature enumeration --- lib/std/os/windows.zig | 104 ++++++++++++++++++++++++++++++++ lib/std/os/windows/kernel32.zig | 1 + 2 files changed, 105 insertions(+) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index ba9979cbb4..4e21bb26f1 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -3714,3 +3714,107 @@ pub const CTRL_LOGOFF_EVENT: DWORD = 5; pub const CTRL_SHUTDOWN_EVENT: DWORD = 6; pub const HANDLER_ROUTINE = std.meta.FnPtr(fn (dwCtrlType: DWORD) callconv(WINAPI) BOOL); + +/// The 64-bit load/store atomic instructions are available. +pub const PF_ARM_64BIT_LOADSTORE_ATOMIC = 25; + +/// The divide instruction_available. +pub const PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE = 24; + +/// The external cache is available. +pub const PF_ARM_EXTERNAL_CACHE_AVAILABLE = 26; + +/// The floating-point multiply-accumulate instruction is available. +pub const PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27; + +/// The VFP/Neon: 32 x 64bit register bank is present. +/// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS. +pub const PF_ARM_VFP_32_REGISTERS_AVAILABLE = 18; + +/// The 3D-Now instruction is available. +pub const PF_3DNOW_INSTRUCTIONS_AVAILABLE = 7; + +/// The processor channels are enabled. +pub const PF_CHANNELS_ENABLED = 16; + +/// The atomic compare and exchange operation (cmpxchg) is available. +pub const PF_COMPARE_EXCHANGE_DOUBLE = 2; + +/// The atomic compare and exchange 128-bit operation (cmpxchg16b) is available. +pub const PF_COMPARE_EXCHANGE128 = 14; + +/// The atomic compare 64 and exchange 128-bit operation (cmp8xchg16) is available. +pub const PF_COMPARE64_EXCHANGE128 = 15; + +/// _fastfail() is available. +pub const PF_FASTFAIL_AVAILABLE = 23; + +/// Floating-point operations are emulated using software emulator. +/// This function returns a nonzero value if floating-point operations are emulated; otherwise, it returns zero. +pub const PF_FLOATING_POINT_EMULATED = 1; + +/// On a Pentium, a floating-point precision error can occur in rare circumstances. +pub const PF_FLOATING_POINT_PRECISION_ERRATA = 0; + +/// The MMX instruction set is available. +pub const PF_MMX_INSTRUCTIONS_AVAILABLE = 3; + +/// Data execution prevention is enabled. +pub const PF_NX_ENABLED = 12; + +/// The processor is PAE-enabled. +pub const PF_PAE_ENABLED = 9; + +/// The RDTSC instruction is available. +pub const PF_RDTSC_INSTRUCTION_AVAILABLE = 8; + +/// RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions are available. +pub const PF_RDWRFSGBASE_AVAILABLE = 22; + +/// Second Level Address Translation is supported by the hardware. +pub const PF_SECOND_LEVEL_ADDRESS_TRANSLATION = 20; + +/// The SSE3 instruction set is available. +pub const PF_SSE3_INSTRUCTIONS_AVAILABLE = 13; + +/// The SSSE3 instruction set is available. +pub const PF_SSSE3_INSTRUCTIONS_AVAILABLE = 36; + +/// The SSE4_1 instruction set is available. +pub const PF_SSE4_1_INSTRUCTIONS_AVAILABLE = 37; + +/// The SSE4_2 instruction set is available. +pub const PF_SSE4_2_INSTRUCTIONS_AVAILABLE = 38; + +/// The AVX instruction set is available. +pub const PF_AVX_INSTRUCTIONS_AVAILABLE = 39; + +/// The AVX2 instruction set is available. +pub const PF_AVX2_INSTRUCTIONS_AVAILABLE = 40; + +/// The AVX512F instruction set is available. +pub const PF_AVX512F_INSTRUCTIONS_AVAILABLE = 41; + +/// Virtualization is enabled in the firmware and made available by the operating system. +pub const PF_VIRT_FIRMWARE_ENABLED = 21; + +/// The SSE instruction set is available. +pub const PF_XMMI_INSTRUCTIONS_AVAILABLE = 6; + +/// The SSE2 instruction set is available. +pub const PF_XMMI64_INSTRUCTIONS_AVAILABLE = 10; + +/// The processor implements the XSAVI and XRSTOR instructions. +pub const PF_XSAVE_ENABLED = 17; + +/// This ARM processor implements the ARM v8 instructions set. +pub const PF_ARM_V8_INSTRUCTIONS_AVAILABLE = 29; + +/// This ARM processor implements the ARM v8 extra cryptographic instructions (i.e., AES, SHA1 and SHA2). +pub const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30; + +/// This ARM processor implements the ARM v8 extra CRC32 instructions. +pub const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31; + +/// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP). +pub const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34; diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index 8d146def7f..09510a5f91 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -231,6 +231,7 @@ pub extern "kernel32" fn GetQueuedCompletionStatusEx( pub extern "kernel32" fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) callconv(WINAPI) void; pub extern "kernel32" fn GetSystemTimeAsFileTime(*FILETIME) callconv(WINAPI) void; +pub extern "kernel32" fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) BOOL; pub extern "kernel32" fn HeapCreate(flOptions: DWORD, dwInitialSize: SIZE_T, dwMaximumSize: SIZE_T) callconv(WINAPI) ?HANDLE; pub extern "kernel32" fn HeapDestroy(hHeap: HANDLE) callconv(WINAPI) BOOL; From 625415d7388deea1ee2d95fe5bbed28bf6a52d01 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Nov 2022 16:45:35 +0100 Subject: [PATCH 02/15] windows: add zig impl of IsProcessorFeaturePresent syscall --- lib/std/os/windows.zig | 316 ++++++++++++++++++++++++++++++++--------- 1 file changed, 246 insertions(+), 70 deletions(-) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 4e21bb26f1..12cfa20dba 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2103,6 +2103,7 @@ pub const USHORT = u16; pub const SHORT = i16; pub const ULONG = u32; pub const LONG = i32; +pub const ULONG64 = u64; pub const ULONGLONG = u64; pub const LONGLONG = i64; pub const HLOCAL = HANDLE; @@ -3715,106 +3716,281 @@ pub const CTRL_SHUTDOWN_EVENT: DWORD = 6; pub const HANDLER_ROUTINE = std.meta.FnPtr(fn (dwCtrlType: DWORD) callconv(WINAPI) BOOL); -/// The 64-bit load/store atomic instructions are available. -pub const PF_ARM_64BIT_LOADSTORE_ATOMIC = 25; +/// Processor feature enumeration. +pub const PF = enum(DWORD) { + /// On a Pentium, a floating-point precision error can occur in rare circumstances. + FLOATING_POINT_PRECISION_ERRATA = 0, -/// The divide instruction_available. -pub const PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE = 24; + /// Floating-point operations are emulated using software emulator. + /// This function returns a nonzero value if floating-point operations are emulated; otherwise, it returns zero. + FLOATING_POINT_EMULATED = 1, -/// The external cache is available. -pub const PF_ARM_EXTERNAL_CACHE_AVAILABLE = 26; + /// The atomic compare and exchange operation (cmpxchg) is available. + COMPARE_EXCHANGE_DOUBLE = 2, -/// The floating-point multiply-accumulate instruction is available. -pub const PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27; + /// The MMX instruction set is available. + MMX_INSTRUCTIONS_AVAILABLE = 3, -/// The VFP/Neon: 32 x 64bit register bank is present. -/// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS. -pub const PF_ARM_VFP_32_REGISTERS_AVAILABLE = 18; + /// The SSE instruction set is available. + XMMI_INSTRUCTIONS_AVAILABLE = 6, -/// The 3D-Now instruction is available. -pub const PF_3DNOW_INSTRUCTIONS_AVAILABLE = 7; + /// The 3D-Now instruction is available. + @"3DNOW_INSTRUCTIONS_AVAILABLE" = 7, -/// The processor channels are enabled. -pub const PF_CHANNELS_ENABLED = 16; + /// The RDTSC instruction is available. + RDTSC_INSTRUCTION_AVAILABLE = 8, -/// The atomic compare and exchange operation (cmpxchg) is available. -pub const PF_COMPARE_EXCHANGE_DOUBLE = 2; + /// The processor is PAE-enabled. + PAE_ENABLED = 9, -/// The atomic compare and exchange 128-bit operation (cmpxchg16b) is available. -pub const PF_COMPARE_EXCHANGE128 = 14; + /// The SSE2 instruction set is available. + XMMI64_INSTRUCTIONS_AVAILABLE = 10, -/// The atomic compare 64 and exchange 128-bit operation (cmp8xchg16) is available. -pub const PF_COMPARE64_EXCHANGE128 = 15; + /// Data execution prevention is enabled. + NX_ENABLED = 12, -/// _fastfail() is available. -pub const PF_FASTFAIL_AVAILABLE = 23; + /// The SSE3 instruction set is available. + SSE3_INSTRUCTIONS_AVAILABLE = 13, -/// Floating-point operations are emulated using software emulator. -/// This function returns a nonzero value if floating-point operations are emulated; otherwise, it returns zero. -pub const PF_FLOATING_POINT_EMULATED = 1; + /// The atomic compare and exchange 128-bit operation (cmpxchg16b) is available. + COMPARE_EXCHANGE128 = 14, -/// On a Pentium, a floating-point precision error can occur in rare circumstances. -pub const PF_FLOATING_POINT_PRECISION_ERRATA = 0; + /// The atomic compare 64 and exchange 128-bit operation (cmp8xchg16) is available. + COMPARE64_EXCHANGE128 = 15, -/// The MMX instruction set is available. -pub const PF_MMX_INSTRUCTIONS_AVAILABLE = 3; + /// The processor channels are enabled. + CHANNELS_ENABLED = 16, -/// Data execution prevention is enabled. -pub const PF_NX_ENABLED = 12; + /// The processor implements the XSAVI and XRSTOR instructions. + XSAVE_ENABLED = 17, -/// The processor is PAE-enabled. -pub const PF_PAE_ENABLED = 9; + /// The VFP/Neon: 32 x 64bit register bank is present. + /// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS. + ARM_VFP_32_REGISTERS_AVAILABLE = 18, -/// The RDTSC instruction is available. -pub const PF_RDTSC_INSTRUCTION_AVAILABLE = 8; + /// Second Level Address Translation is supported by the hardware. + SECOND_LEVEL_ADDRESS_TRANSLATION = 20, -/// RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions are available. -pub const PF_RDWRFSGBASE_AVAILABLE = 22; + /// Virtualization is enabled in the firmware and made available by the operating system. + VIRT_FIRMWARE_ENABLED = 21, -/// Second Level Address Translation is supported by the hardware. -pub const PF_SECOND_LEVEL_ADDRESS_TRANSLATION = 20; + /// RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions are available. + RDWRFSGBASE_AVAILABLE = 22, -/// The SSE3 instruction set is available. -pub const PF_SSE3_INSTRUCTIONS_AVAILABLE = 13; + /// _fastfail() is available. + FASTFAIL_AVAILABLE = 23, -/// The SSSE3 instruction set is available. -pub const PF_SSSE3_INSTRUCTIONS_AVAILABLE = 36; + /// The divide instruction_available. + ARM_DIVIDE_INSTRUCTION_AVAILABLE = 24, -/// The SSE4_1 instruction set is available. -pub const PF_SSE4_1_INSTRUCTIONS_AVAILABLE = 37; + /// The 64-bit load/store atomic instructions are available. + ARM_64BIT_LOADSTORE_ATOMIC = 25, -/// The SSE4_2 instruction set is available. -pub const PF_SSE4_2_INSTRUCTIONS_AVAILABLE = 38; + /// The external cache is available. + ARM_EXTERNAL_CACHE_AVAILABLE = 26, -/// The AVX instruction set is available. -pub const PF_AVX_INSTRUCTIONS_AVAILABLE = 39; + /// The floating-point multiply-accumulate instruction is available. + ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27, -/// The AVX2 instruction set is available. -pub const PF_AVX2_INSTRUCTIONS_AVAILABLE = 40; + /// This ARM processor implements the ARM v8 instructions set. + ARM_V8_INSTRUCTIONS_AVAILABLE = 29, -/// The AVX512F instruction set is available. -pub const PF_AVX512F_INSTRUCTIONS_AVAILABLE = 41; + /// This ARM processor implements the ARM v8 extra cryptographic instructions (i.e., AES, SHA1 and SHA2). + ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30, -/// Virtualization is enabled in the firmware and made available by the operating system. -pub const PF_VIRT_FIRMWARE_ENABLED = 21; + /// This ARM processor implements the ARM v8 extra CRC32 instructions. + ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31, -/// The SSE instruction set is available. -pub const PF_XMMI_INSTRUCTIONS_AVAILABLE = 6; + /// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP). + ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34, -/// The SSE2 instruction set is available. -pub const PF_XMMI64_INSTRUCTIONS_AVAILABLE = 10; + /// The SSSE3 instruction set is available. + SSSE3_INSTRUCTIONS_AVAILABLE = 36, -/// The processor implements the XSAVI and XRSTOR instructions. -pub const PF_XSAVE_ENABLED = 17; + /// The SSE4_1 instruction set is available. + SSE4_1_INSTRUCTIONS_AVAILABLE = 37, -/// This ARM processor implements the ARM v8 instructions set. -pub const PF_ARM_V8_INSTRUCTIONS_AVAILABLE = 29; + /// The SSE4_2 instruction set is available. + SSE4_2_INSTRUCTIONS_AVAILABLE = 38, -/// This ARM processor implements the ARM v8 extra cryptographic instructions (i.e., AES, SHA1 and SHA2). -pub const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30; + /// The AVX instruction set is available. + AVX_INSTRUCTIONS_AVAILABLE = 39, -/// This ARM processor implements the ARM v8 extra CRC32 instructions. -pub const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31; + /// The AVX2 instruction set is available. + AVX2_INSTRUCTIONS_AVAILABLE = 40, -/// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP). -pub const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34; + /// The AVX512F instruction set is available. + AVX512F_INSTRUCTIONS_AVAILABLE = 41, +}; + +pub const MAX_WOW64_SHARED_ENTRIES = 16; +pub const PROCESSOR_FEATURE_MAX = 64; +pub const MAXIMUM_XSTATE_FEATURES = 64; + +pub const KSYSTEM_TIME = extern struct { + LowPart: ULONG, + High1Time: LONG, + High2Time: LONG, +}; + +pub const NT_PRODUCT_TYPE = enum(INT) { + NtProductWinNt = 1, + NtProductLanManNt, + NtProductServer, +}; + +pub const ALTERNATIVE_ARCHITECTURE_TYPE = enum(INT) { + StandardDesign, + NEC98x86, + EndAlternatives, +}; + +pub const XSTATE_FEATURE = extern struct { + Offset: ULONG, + Size: ULONG, +}; + +pub const XSTATE_CONFIGURATION = extern struct { + EnabledFeatures: ULONG64, + Size: ULONG, + OptimizedSave: ULONG, + Features: [MAXIMUM_XSTATE_FEATURES]XSTATE_FEATURE, +}; + +/// Shared Kernel User Data +pub const KUSER_SHARED_DATA = extern struct { + TickCountLowDeprecated: ULONG, + TickCountMultiplier: ULONG, + InterruptTime: KSYSTEM_TIME, + SystemTime: KSYSTEM_TIME, + TimeZoneBias: KSYSTEM_TIME, + ImageNumberLow: USHORT, + ImageNumberHigh: USHORT, + NtSystemRoot: [260]WCHAR, + MaxStackTraceDepth: ULONG, + CryptoExponent: ULONG, + TimeZoneId: ULONG, + LargePageMinimum: ULONG, + AitSamplingValue: ULONG, + AppCompatFlag: ULONG, + RNGSeedVersion: ULONGLONG, + GlobalValidationRunlevel: ULONG, + TimeZoneBiasStamp: LONG, + NtBuildNumber: ULONG, + NtProductType: NT_PRODUCT_TYPE, + ProductTypeIsValid: BOOLEAN, + Reserved0: [1]BOOLEAN, + NativeProcessorArchitecture: USHORT, + NtMajorVersion: ULONG, + NtMinorVersion: ULONG, + ProcessorFeatures: [PROCESSOR_FEATURE_MAX]BOOLEAN, + Reserved1: ULONG, + Reserved3: ULONG, + TimeSlip: ULONG, + AlternativeArchitecture: ALTERNATIVE_ARCHITECTURE_TYPE, + BootId: ULONG, + SystemExpirationDate: LARGE_INTEGER, + SuiteMaskY: ULONG, + KdDebuggerEnabled: BOOLEAN, + DummyUnion1: extern union { + MitigationPolicies: UCHAR, + Alt: packed struct { + NXSupportPolicy: u2, + SEHValidationPolicy: u2, + CurDirDevicesSkippedForDlls: u2, + Reserved: u2, + }, + }, + CyclesPerYield: USHORT, + ActiveConsoleId: ULONG, + DismountCount: ULONG, + ComPlusPackage: ULONG, + LastSystemRITEventTickCount: ULONG, + NumberOfPhysicalPages: ULONG, + SafeBootMode: BOOLEAN, + DummyUnion2: extern union { + VirtualizationFlags: UCHAR, + Alt: packed struct { + ArchStartedInEl2: u1, + QcSlIsSupported: u1, + SpareBits: u6, + }, + }, + Reserved12: [2]UCHAR, + DummyUnion3: extern union { + SharedDataFlags: ULONG, + Alt: packed struct { + DbgErrorPortPresent: u1, + DbgElevationEnabled: u1, + DbgVirtEnabled: u1, + DbgInstallerDetectEnabled: u1, + DbgLkgEnabled: u1, + DbgDynProcessorEnabled: u1, + DbgConsoleBrokerEnabled: u1, + DbgSecureBootEnabled: u1, + DbgMultiSessionSku: u1, + DbgMultiUsersInSessionSku: u1, + DbgStateSeparationEnabled: u1, + SpareBits: u21, + }, + }, + DataFlagsPad: [1]ULONG, + TestRetInstruction: ULONGLONG, + QpcFrequency: LONGLONG, + SystemCall: ULONG, + Reserved2: ULONG, + SystemCallPad: [2]ULONGLONG, + DummyUnion4: extern union { + TickCount: KSYSTEM_TIME, + TickCountQuad: ULONG64, + Alt: extern struct { + ReservedTickCountOverlay: [3]ULONG, + TickCountPad: [1]ULONG, + }, + }, + Cookie: ULONG, + CookiePad: [1]ULONG, + ConsoleSessionForegroundProcessId: LONGLONG, + TimeUpdateLock: ULONGLONG, + BaselineSystemTimeQpc: ULONGLONG, + BaselineInterruptTimeQpc: ULONGLONG, + QpcSystemTimeIncrement: ULONGLONG, + QpcInterruptTimeIncrement: ULONGLONG, + QpcSystemTimeIncrementShift: UCHAR, + QpcInterruptTimeIncrementShift: UCHAR, + UnparkedProcessorCount: USHORT, + EnclaveFeatureMask: [4]ULONG, + TelemetryCoverageRound: ULONG, + UserModeGlobalLogger: [16]USHORT, + ImageFileExecutionOptions: ULONG, + LangGenerationCount: ULONG, + Reserved4: ULONGLONG, + InterruptTimeBias: ULONGLONG, + QpcBias: ULONGLONG, + ActiveProcessorCount: ULONG, + ActiveGroupCount: UCHAR, + Reserved9: UCHAR, + DummyUnion5: extern union { + QpcData: USHORT, + Alt: extern struct { + QpcBypassEnabled: UCHAR, + QpcShift: UCHAR, + }, + }, + TimeZoneBiasEffectiveStart: LARGE_INTEGER, + TimeZoneBiasEffectiveEnd: LARGE_INTEGER, + XState: XSTATE_CONFIGURATION, + FeatureConfigurationChangeStamp: KSYSTEM_TIME, + Spare: ULONG, + UserPointerAuthMask: ULONG64, +}; + +/// Read-only user-mode address for the shared data. +/// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm +/// https://msrc-blog.microsoft.com/2022/04/05/randomizing-the-kuser_shared_data-structure-on-windows/ +pub const SharedUserData: *const KUSER_SHARED_DATA = @intToPtr(*const KUSER_SHARED_DATA, 0x7FFE0000); + +pub fn IsProcessorFeaturePresent(feature: PF) bool { + if (@enumToInt(feature) >= PROCESSOR_FEATURE_MAX) return false; + return SharedUserData.ProcessorFeatures[@enumToInt(feature)] == 1; +} From 6edf9127fe810a5f6493e61f123f9f25a2c3f7dd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Nov 2022 19:45:07 +0100 Subject: [PATCH 03/15] windows: add missing enum values for ProcessorFeatures enumeration --- lib/std/os/windows.zig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 12cfa20dba..5308a6fae2 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -3731,6 +3731,9 @@ pub const PF = enum(DWORD) { /// The MMX instruction set is available. MMX_INSTRUCTIONS_AVAILABLE = 3, + PPC_MOVEMEM_64BIT_OK = 4, + ALPHA_BYTE_INSTRUCTIONS = 5, + /// The SSE instruction set is available. XMMI_INSTRUCTIONS_AVAILABLE = 6, @@ -3746,6 +3749,8 @@ pub const PF = enum(DWORD) { /// The SSE2 instruction set is available. XMMI64_INSTRUCTIONS_AVAILABLE = 10, + SSE_DAZ_MODE_AVAILABLE = 11, + /// Data execution prevention is enabled. NX_ENABLED = 12, @@ -3768,6 +3773,9 @@ pub const PF = enum(DWORD) { /// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS. ARM_VFP_32_REGISTERS_AVAILABLE = 18, + /// This ARM processor implements the ARM v8 NEON instruction set. + ARM_NEON_INSTRUCTIONS_AVAILABLE = 19, + /// Second Level Address Translation is supported by the hardware. SECOND_LEVEL_ADDRESS_TRANSLATION = 20, @@ -3792,6 +3800,8 @@ pub const PF = enum(DWORD) { /// The floating-point multiply-accumulate instruction is available. ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27, + RDRAND_INSTRUCTION_AVAILABLE = 28, + /// This ARM processor implements the ARM v8 instructions set. ARM_V8_INSTRUCTIONS_AVAILABLE = 29, @@ -3801,9 +3811,14 @@ pub const PF = enum(DWORD) { /// This ARM processor implements the ARM v8 extra CRC32 instructions. ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31, + RDTSCP_INSTRUCTION_AVAILABLE = 32, + RDPID_INSTRUCTION_AVAILABLE = 33, + /// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP). ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34, + MONITORX_INSTRUCTION_AVAILABLE = 35, + /// The SSSE3 instruction set is available. SSSE3_INSTRUCTIONS_AVAILABLE = 36, @@ -3821,6 +3836,10 @@ pub const PF = enum(DWORD) { /// The AVX512F instruction set is available. AVX512F_INSTRUCTIONS_AVAILABLE = 41, + + ERMS_AVAILABLE = 42, + ARM_V82_DP_INSTRUCTIONS_AVAILABLE = 43, + ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44, }; pub const MAX_WOW64_SHARED_ENTRIES = 16; From 29aafdcd5520e15d2d395d616f2aee6ed9ba4648 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Nov 2022 19:45:38 +0100 Subject: [PATCH 04/15] windows: detect native CPU features for aarch64 SoCs --- lib/std/zig/system/NativeTargetInfo.zig | 1 + lib/std/zig/system/windows.zig | 41 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig index 09b863cdf7..0232797387 100644 --- a/lib/std/zig/system/NativeTargetInfo.zig +++ b/lib/std/zig/system/NativeTargetInfo.zig @@ -978,6 +978,7 @@ fn detectNativeCpuAndFeatures(cpu_arch: Target.Cpu.Arch, os: Target.Os, cross_ta switch (builtin.os.tag) { .linux => return linux.detectNativeCpuAndFeatures(), .macos => return darwin.macos.detectNativeCpuAndFeatures(), + .windows => return windows.detectNativeCpuAndFeatures(), else => {}, } diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 595dac6278..eb62068120 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -1,6 +1,10 @@ const std = @import("std"); +const builtin = @import("builtin"); +const Target = std.Target; pub const WindowsVersion = std.Target.Os.WindowsVersion; +pub const PF = std.os.windows.PF; +pub const IsProcessorFeaturePresent = std.os.windows.IsProcessorFeaturePresent; /// Returns the highest known WindowsVersion deduced from reported runtime information. /// Discards information about in-between versions we don't differentiate. @@ -38,3 +42,40 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } + +fn detectNativeCpuAndFeaturesArm64() Target.Cpu { + const Feature = Target.aarch64.Feature; + + var cpu = Target.Cpu{ + .arch = .aarch64, + .model = Target.Cpu.Model.generic(.aarch64), + .features = Target.Cpu.Feature.Set.empty, + }; + + if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.neon)); + } + if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crc)); + } + if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crypto)); + } + + return cpu; +} + +fn getCpuCount() usize { + return std.os.windows.peb().NumberOfProcessors; +} + +pub fn detectNativeCpuAndFeatures() ?Target.Cpu { + switch (builtin.cpu.arch) { + .aarch64 => return detectNativeCpuAndFeaturesArm64(), + else => |arch| return .{ + .arch = arch, + .model = Target.Cpu.Model.generic(arch), + .features = Target.Cpu.Feature.Set.empty, + }, + } +} From e74b6f0a4bb86d5186ccc47ffd096f2ebfb29bc1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Nov 2022 20:09:21 +0100 Subject: [PATCH 05/15] windows: detect couple more aarch64 CPU features --- lib/std/os/windows.zig | 4 ++++ lib/std/zig/system/windows.zig | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 5308a6fae2..b6d61e324e 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -3838,7 +3838,11 @@ pub const PF = enum(DWORD) { AVX512F_INSTRUCTIONS_AVAILABLE = 41, ERMS_AVAILABLE = 42, + + /// This ARM processor implements the ARM v8.2 Dot Product (DP) instructions. ARM_V82_DP_INSTRUCTIONS_AVAILABLE = 43, + + /// This ARM processor implements the ARM v8.3 JavaScript conversion (JSCVT) instructions. ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44, }; diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index eb62068120..569daf0a30 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -61,6 +61,15 @@ fn detectNativeCpuAndFeaturesArm64() Target.Cpu { if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.crypto)); } + if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.lse)); + } + if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.dotprod)); + } + if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.jsconv)); + } return cpu; } From 153afed877e6f9a068d5db61f42cdb872c72aeab Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Nov 2022 22:26:19 +0100 Subject: [PATCH 06/15] windows: impl some primitives for getting registry keys --- lib/std/os/windows.zig | 189 ++++++++++++++++++++++++++++++++++- lib/std/os/windows/ntdll.zig | 6 ++ lib/std/target/aarch64.zig | 15 +++ 3 files changed, 209 insertions(+), 1 deletion(-) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index b6d61e324e..fc42dd381c 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2504,6 +2504,7 @@ pub const STANDARD_RIGHTS_READ = READ_CONTROL; pub const STANDARD_RIGHTS_WRITE = READ_CONTROL; pub const STANDARD_RIGHTS_EXECUTE = READ_CONTROL; pub const STANDARD_RIGHTS_REQUIRED = DELETE | READ_CONTROL | WRITE_DAC | WRITE_OWNER; +pub const MAXIMUM_ALLOWED = 0x02000000; // disposition for NtCreateFile pub const FILE_SUPERSEDE = 0; @@ -2872,9 +2873,11 @@ pub const PROV_RSA_FULL = 1; pub const REGSAM = ACCESS_MASK; pub const ACCESS_MASK = DWORD; -pub const HKEY = *opaque {}; pub const LSTATUS = LONG; +pub const HKEY = HANDLE; +pub const HKEY_LOCAL_MACHINE: HKEY = @intToPtr(HKEY, 0x80000002); + pub const FILE_NOTIFY_INFORMATION = extern struct { NextEntryOffset: DWORD, Action: DWORD, @@ -4017,3 +4020,187 @@ pub fn IsProcessorFeaturePresent(feature: PF) bool { if (@enumToInt(feature) >= PROCESSOR_FEATURE_MAX) return false; return SharedUserData.ProcessorFeatures[@enumToInt(feature)] == 1; } + +pub const KEY_QUERY_VALUE = 0x0001; + +/// Open symbolic link. +pub const REG_OPTION_OPEN_LINK: DWORD = 0x8; + +inline fn IsPredefKey(hkey: HKEY) bool { + return @ptrToInt(hkey) & 0xF0000000 == 0x80000000; +} + +inline fn GetPredefKeyIndex(hkey: HKEY) usize { + return @ptrToInt(hkey) & 0x0FFFFFFF; +} + +inline fn ClosePredefKey(hkey: HKEY) void { + if (@ptrToInt(hkey) & 0x1 != 0) { + assert(ntdll.NtClose(hkey) == .SUCCESS); + } +} + +const MAX_DEFAULT_HANDLES = 6; +pub const REG_MAX_NAME_SIZE = 256; + +pub const RegOpenKeyOpts = struct { + ulOptions: DWORD = 0, + samDesired: ACCESS_MASK = KEY_QUERY_VALUE, +}; + +/// Pulls existing key from the registry. +pub fn RegOpenKey(hkey: HKEY, lpSubKey: []const u16, opts: RegOpenKeyOpts) !HKEY { + if (IsPredefKey(hkey) and lpSubKey.len == 0) { + return hkey; + } + + const key_handle = try MapDefaultKey(hkey); + defer ClosePredefKey(key_handle); + + var subkey_string: UNICODE_STRING = undefined; + if (lpSubKey.len == 0 or mem.eql(u16, &[_]u16{'\\'}, lpSubKey)) { + subkey_string = .{ + .Length = 0, + .MaximumLength = 0, + .Buffer = @intToPtr([*]u16, @ptrToInt(&[0]u16{})), + }; + } else { + const len_bytes = math.cast(u16, lpSubKey.len * 2) orelse return error.NameTooLong; + subkey_string = .{ + .Length = len_bytes, + .MaximumLength = len_bytes, + .Buffer = @intToPtr([*]u16, @ptrToInt(lpSubKey.ptr)), + }; + } + + var attributes: ULONG = OBJ_CASE_INSENSITIVE; + if (opts.ulOptions & REG_OPTION_OPEN_LINK != 0) { + attributes |= OBJ_OPENLINK; + } + + var attr = OBJECT_ATTRIBUTES{ + .Length = @sizeOf(OBJECT_ATTRIBUTES), + .RootDirectory = key_handle, + .Attributes = attributes, + .ObjectName = &subkey_string, + .SecurityDescriptor = null, + .SecurityQualityOfService = null, + }; + + var result: HKEY = undefined; + const rc = ntdll.NtOpenKey( + &result, + opts.samDesired, + attr, + ); + switch (rc) { + .SUCCESS => return result, + else => return unexpectedStatus(rc), + } +} + +pub fn RegCloseKey(hkey: HKEY) void { + if (IsPredefKey(hkey)) return; + assert(ntdll.NtClose(hkey) == .SUCCESS); +} + +extern var DefaultHandleHKUDisabled: BOOLEAN; +extern var DefaultHandlesDisabled: BOOLEAN; +extern var DefaultHandleTable: [MAX_DEFAULT_HANDLES]?HANDLE; + +fn MapDefaultKey(key: HKEY) !HANDLE { + if (!IsPredefKey(key)) return @intToPtr(HANDLE, @ptrToInt(key) & ~@as(usize, 0x1)); + + const index = GetPredefKeyIndex(key); + if (index >= MAX_DEFAULT_HANDLES) { + return error.InvalidParameter; + } + + const def_disabled = if (key == HKEY_LOCAL_MACHINE) DefaultHandleHKUDisabled else DefaultHandlesDisabled; + + var handle: HANDLE = undefined; + var do_open: bool = true; + + if (def_disabled != 0) { + const tmp = DefaultHandleTable[index]; + if (tmp) |h| { + do_open = false; + handle = h; + } + } + + if (do_open) { + handle = try OpenPredefinedKey(index); + } + + if (def_disabled == 0) { + handle = @intToPtr(HANDLE, @ptrToInt(handle) | 0x1); + } + + return handle; +} + +fn OpenPredefinedKey(index: usize) !HANDLE { + switch (index) { + 0 => { + // HKEY_CLASSES_ROOT + return error.Unimplemented; + }, + 1 => { + // HKEY_CURRENT_USER + return error.Unimplemented; + }, + 2 => { + // HKEY_LOCAL_MACHINE + return OpenLocalMachineKey(); + }, + 3 => { + // HKEY_USERS + return error.Unimplemented; + }, + 5 => { + // HKEY_CURRENT_CONFIG + return error.Unimplemented; + }, + 6 => { + // HKEY_DYN_DATA + return error.Unimplemented; + }, + else => { + return error.InvalidParameter; + }, + } +} + +fn OpenLocalMachineKey() !HANDLE { + const path = "\\Registry\\Machine"; + var path_u16: [REG_MAX_NAME_SIZE]u16 = undefined; + const path_len_u16 = try std.unicode.utf8ToUtf16Le(&path_u16, path); + const path_len_bytes = @intCast(u16, path_len_u16 * 2); + + var key_name = UNICODE_STRING{ + .Length = path_len_bytes, + .MaximumLength = path_len_bytes, + .Buffer = @intToPtr([*]u16, @ptrToInt(&path_u16)), + }; + + var attr = OBJECT_ATTRIBUTES{ + .Length = @sizeOf(OBJECT_ATTRIBUTES), + .RootDirectory = null, + .Attributes = OBJ_CASE_INSENSITIVE, + .ObjectName = &key_name, + .SecurityDescriptor = null, + .SecurityQualityOfService = null, + }; + + var result: HKEY = undefined; + const rc = ntdll.NtOpenKey( + &result, + MAXIMUM_ALLOWED, + attr, + ); + switch (rc) { + .SUCCESS => return result, + else => return unexpectedStatus(rc), + } +} diff --git a/lib/std/os/windows/ntdll.zig b/lib/std/os/windows/ntdll.zig index bf9dc9bd2f..4932a6f679 100644 --- a/lib/std/os/windows/ntdll.zig +++ b/lib/std/os/windows/ntdll.zig @@ -253,3 +253,9 @@ pub extern "ntdll" fn NtUnlockFile( Length: *const LARGE_INTEGER, Key: ?*ULONG, ) callconv(WINAPI) NTSTATUS; + +pub extern "ntdll" fn NtOpenKey( + KeyHandle: *HANDLE, + DesiredAccess: ACCESS_MASK, + ObjectAttributes: OBJECT_ATTRIBUTES, +) callconv(WINAPI) NTSTATUS; diff --git a/lib/std/target/aarch64.zig b/lib/std/target/aarch64.zig index af50c9d890..2fd0d337e3 100644 --- a/lib/std/target/aarch64.zig +++ b/lib/std/target/aarch64.zig @@ -2252,4 +2252,19 @@ pub const cpu = struct { .v8a, }), }; + + pub const microsoft_sq3 = CpuModel{ + .name = "microsoft_sq3", + .llvm_name = "generic", + .features = featureSet(&[_]Feature{ + .aes, + .crc, + .crypto, + .dotprod, + .fp_armv8, + .lse, + .neon, + .sha2, + }), + }; }; From 7ea2c7fbcdc4c0555dcc45aebc1eb8e1bf4793b3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Nov 2022 17:34:45 +0100 Subject: [PATCH 07/15] windows: use RtlQueryRegistryValues to query reg in a single syscall --- lib/std/os/windows.zig | 311 +++++++++++++------------------- lib/std/os/windows/kernel32.zig | 11 ++ lib/std/os/windows/ntdll.zig | 10 + lib/std/zig/system/windows.zig | 115 +++++++++++- 4 files changed, 261 insertions(+), 186 deletions(-) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index fc42dd381c..4111a82707 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2088,6 +2088,7 @@ pub const LPWSTR = [*:0]WCHAR; pub const LPCWSTR = [*:0]const WCHAR; pub const PVOID = *anyopaque; pub const PWSTR = [*:0]WCHAR; +pub const PCWSTR = [*:0]const WCHAR; pub const SIZE_T = usize; pub const UINT = c_uint; pub const ULONG_PTR = usize; @@ -2876,8 +2877,134 @@ pub const ACCESS_MASK = DWORD; pub const LSTATUS = LONG; pub const HKEY = HANDLE; + pub const HKEY_LOCAL_MACHINE: HKEY = @intToPtr(HKEY, 0x80000002); +/// Combines the STANDARD_RIGHTS_REQUIRED, KEY_QUERY_VALUE, KEY_SET_VALUE, KEY_CREATE_SUB_KEY, +/// KEY_ENUMERATE_SUB_KEYS, KEY_NOTIFY, and KEY_CREATE_LINK access rights. +pub const KEY_ALL_ACCESS = 0xF003F; +/// Reserved for system use. +pub const KEY_CREATE_LINK = 0x0020; +/// Required to create a subkey of a registry key. +pub const KEY_CREATE_SUB_KEY = 0x0004; +/// Required to enumerate the subkeys of a registry key. +pub const KEY_ENUMERATE_SUB_KEYS = 0x0008; +/// Equivalent to KEY_READ. +pub const KEY_EXECUTE = 0x20019; +/// Required to request change notifications for a registry key or for subkeys of a registry key. +pub const KEY_NOTIFY = 0x0010; +/// Required to query the values of a registry key. +pub const KEY_QUERY_VALUE = 0x0001; +/// Combines the STANDARD_RIGHTS_READ, KEY_QUERY_VALUE, KEY_ENUMERATE_SUB_KEYS, and KEY_NOTIFY values. +pub const KEY_READ = 0x20019; +/// Required to create, delete, or set a registry value. +pub const KEY_SET_VALUE = 0x0002; +/// Indicates that an application on 64-bit Windows should operate on the 32-bit registry view. +/// This flag is ignored by 32-bit Windows. +pub const KEY_WOW64_32KEY = 0x0200; +/// Indicates that an application on 64-bit Windows should operate on the 64-bit registry view. +/// This flag is ignored by 32-bit Windows. +pub const KEY_WOW64_64KEY = 0x0100; +/// Combines the STANDARD_RIGHTS_WRITE, KEY_SET_VALUE, and KEY_CREATE_SUB_KEY access rights. +pub const KEY_WRITE = 0x20006; + +/// Open symbolic link. +pub const REG_OPTION_OPEN_LINK: DWORD = 0x8; + +pub const RTL_QUERY_REGISTRY_TABLE = extern struct { + QueryRoutine: RTL_QUERY_REGISTRY_ROUTINE, + Flags: ULONG, + Name: ?PWSTR, + EntryContext: ?*anyopaque, + DefaultType: ULONG, + DefaultData: ?*anyopaque, + DefaultLength: ULONG, +}; + +pub const RTL_QUERY_REGISTRY_ROUTINE = ?std.meta.FnPtr(fn ( + PWSTR, + ULONG, + ?*anyopaque, + ULONG, + ?*anyopaque, + ?*anyopaque, +) callconv(WINAPI) NTSTATUS); + +/// Path is a full path +pub const RTL_REGISTRY_ABSOLUTE = 0; +/// \Registry\Machine\System\CurrentControlSet\Services +pub const RTL_REGISTRY_SERVICES = 1; +/// \Registry\Machine\System\CurrentControlSet\Control +pub const RTL_REGISTRY_CONTROL = 2; +/// \Registry\Machine\Software\Microsoft\Windows NT\CurrentVersion +pub const RTL_REGISTRY_WINDOWS_NT = 3; +/// \Registry\Machine\Hardware\DeviceMap +pub const RTL_REGISTRY_DEVICEMAP = 4; +/// \Registry\User\CurrentUser +pub const RTL_REGISTRY_USER = 5; +pub const RTL_REGISTRY_MAXIMUM = 6; + +/// Low order bits are registry handle +pub const RTL_REGISTRY_HANDLE = 0x40000000; +/// Indicates the key node is optional +pub const RTL_REGISTRY_OPTIONAL = 0x80000000; + +/// Name is a subkey and remainder of table or until next subkey are value +/// names for that subkey to look at. +pub const RTL_QUERY_REGISTRY_SUBKEY = 0x00000001; + +/// Reset current key to original key for this and all following table entries. +pub const RTL_QUERY_REGISTRY_TOPKEY = 0x00000002; + +/// Fail if no match found for this table entry. +pub const RTL_QUERY_REGISTRY_REQUIRED = 0x00000004; + +/// Used to mark a table entry that has no value name, just wants a call out, not +/// an enumeration of all values. +pub const RTL_QUERY_REGISTRY_NOVALUE = 0x00000008; + +/// Used to suppress the expansion of REG_MULTI_SZ into multiple callouts or +/// to prevent the expansion of environment variable values in REG_EXPAND_SZ. +pub const RTL_QUERY_REGISTRY_NOEXPAND = 0x00000010; + +/// QueryRoutine field ignored. EntryContext field points to location to store value. +/// For null terminated strings, EntryContext points to UNICODE_STRING structure that +/// that describes maximum size of buffer. If .Buffer field is NULL then a buffer is +/// allocated. +pub const RTL_QUERY_REGISTRY_DIRECT = 0x00000020; + +/// Used to delete value keys after they are queried. +pub const RTL_QUERY_REGISTRY_DELETE = 0x00000040; + +/// Use this flag with the RTL_QUERY_REGISTRY_DIRECT flag to verify that the REG_XXX type +/// of the stored registry value matches the type expected by the caller. +/// If the types do not match, the call fails. +pub const RTL_QUERY_REGISTRY_TYPECHECK = 0x00000100; + +/// No value type +pub const REG_NONE = 0; +/// Unicode nul terminated string +pub const REG_SZ = 1; +/// Unicode nul terminated string (with environment variable references) +pub const REG_EXPAND_SZ = 2; +/// Free form binary +pub const REG_BINARY = 3; +/// 32-bit number +pub const REG_DWORD = 4; +/// 32-bit number (same as REG_DWORD) +pub const REG_DWORD_LITTLE_ENDIAN = 4; +/// 32-bit number +pub const REG_DWORD_BIG_ENDIAN = 5; +/// Symbolic Link (unicode) +pub const REG_LINK = 6; +/// Multiple Unicode strings +pub const REG_MULTI_SZ = 7; +/// Resource list in the resource map +pub const REG_RESOURCE_LIST = 8; +/// Resource list in the hardware description +pub const REG_FULL_RESOURCE_DESCRIPTOR = 9; +pub const REG_RESOURCE_REQUIREMENTS_LIST = 10; + pub const FILE_NOTIFY_INFORMATION = extern struct { NextEntryOffset: DWORD, Action: DWORD, @@ -4020,187 +4147,3 @@ pub fn IsProcessorFeaturePresent(feature: PF) bool { if (@enumToInt(feature) >= PROCESSOR_FEATURE_MAX) return false; return SharedUserData.ProcessorFeatures[@enumToInt(feature)] == 1; } - -pub const KEY_QUERY_VALUE = 0x0001; - -/// Open symbolic link. -pub const REG_OPTION_OPEN_LINK: DWORD = 0x8; - -inline fn IsPredefKey(hkey: HKEY) bool { - return @ptrToInt(hkey) & 0xF0000000 == 0x80000000; -} - -inline fn GetPredefKeyIndex(hkey: HKEY) usize { - return @ptrToInt(hkey) & 0x0FFFFFFF; -} - -inline fn ClosePredefKey(hkey: HKEY) void { - if (@ptrToInt(hkey) & 0x1 != 0) { - assert(ntdll.NtClose(hkey) == .SUCCESS); - } -} - -const MAX_DEFAULT_HANDLES = 6; -pub const REG_MAX_NAME_SIZE = 256; - -pub const RegOpenKeyOpts = struct { - ulOptions: DWORD = 0, - samDesired: ACCESS_MASK = KEY_QUERY_VALUE, -}; - -/// Pulls existing key from the registry. -pub fn RegOpenKey(hkey: HKEY, lpSubKey: []const u16, opts: RegOpenKeyOpts) !HKEY { - if (IsPredefKey(hkey) and lpSubKey.len == 0) { - return hkey; - } - - const key_handle = try MapDefaultKey(hkey); - defer ClosePredefKey(key_handle); - - var subkey_string: UNICODE_STRING = undefined; - if (lpSubKey.len == 0 or mem.eql(u16, &[_]u16{'\\'}, lpSubKey)) { - subkey_string = .{ - .Length = 0, - .MaximumLength = 0, - .Buffer = @intToPtr([*]u16, @ptrToInt(&[0]u16{})), - }; - } else { - const len_bytes = math.cast(u16, lpSubKey.len * 2) orelse return error.NameTooLong; - subkey_string = .{ - .Length = len_bytes, - .MaximumLength = len_bytes, - .Buffer = @intToPtr([*]u16, @ptrToInt(lpSubKey.ptr)), - }; - } - - var attributes: ULONG = OBJ_CASE_INSENSITIVE; - if (opts.ulOptions & REG_OPTION_OPEN_LINK != 0) { - attributes |= OBJ_OPENLINK; - } - - var attr = OBJECT_ATTRIBUTES{ - .Length = @sizeOf(OBJECT_ATTRIBUTES), - .RootDirectory = key_handle, - .Attributes = attributes, - .ObjectName = &subkey_string, - .SecurityDescriptor = null, - .SecurityQualityOfService = null, - }; - - var result: HKEY = undefined; - const rc = ntdll.NtOpenKey( - &result, - opts.samDesired, - attr, - ); - switch (rc) { - .SUCCESS => return result, - else => return unexpectedStatus(rc), - } -} - -pub fn RegCloseKey(hkey: HKEY) void { - if (IsPredefKey(hkey)) return; - assert(ntdll.NtClose(hkey) == .SUCCESS); -} - -extern var DefaultHandleHKUDisabled: BOOLEAN; -extern var DefaultHandlesDisabled: BOOLEAN; -extern var DefaultHandleTable: [MAX_DEFAULT_HANDLES]?HANDLE; - -fn MapDefaultKey(key: HKEY) !HANDLE { - if (!IsPredefKey(key)) return @intToPtr(HANDLE, @ptrToInt(key) & ~@as(usize, 0x1)); - - const index = GetPredefKeyIndex(key); - if (index >= MAX_DEFAULT_HANDLES) { - return error.InvalidParameter; - } - - const def_disabled = if (key == HKEY_LOCAL_MACHINE) DefaultHandleHKUDisabled else DefaultHandlesDisabled; - - var handle: HANDLE = undefined; - var do_open: bool = true; - - if (def_disabled != 0) { - const tmp = DefaultHandleTable[index]; - if (tmp) |h| { - do_open = false; - handle = h; - } - } - - if (do_open) { - handle = try OpenPredefinedKey(index); - } - - if (def_disabled == 0) { - handle = @intToPtr(HANDLE, @ptrToInt(handle) | 0x1); - } - - return handle; -} - -fn OpenPredefinedKey(index: usize) !HANDLE { - switch (index) { - 0 => { - // HKEY_CLASSES_ROOT - return error.Unimplemented; - }, - 1 => { - // HKEY_CURRENT_USER - return error.Unimplemented; - }, - 2 => { - // HKEY_LOCAL_MACHINE - return OpenLocalMachineKey(); - }, - 3 => { - // HKEY_USERS - return error.Unimplemented; - }, - 5 => { - // HKEY_CURRENT_CONFIG - return error.Unimplemented; - }, - 6 => { - // HKEY_DYN_DATA - return error.Unimplemented; - }, - else => { - return error.InvalidParameter; - }, - } -} - -fn OpenLocalMachineKey() !HANDLE { - const path = "\\Registry\\Machine"; - var path_u16: [REG_MAX_NAME_SIZE]u16 = undefined; - const path_len_u16 = try std.unicode.utf8ToUtf16Le(&path_u16, path); - const path_len_bytes = @intCast(u16, path_len_u16 * 2); - - var key_name = UNICODE_STRING{ - .Length = path_len_bytes, - .MaximumLength = path_len_bytes, - .Buffer = @intToPtr([*]u16, @ptrToInt(&path_u16)), - }; - - var attr = OBJECT_ATTRIBUTES{ - .Length = @sizeOf(OBJECT_ATTRIBUTES), - .RootDirectory = null, - .Attributes = OBJ_CASE_INSENSITIVE, - .ObjectName = &key_name, - .SecurityDescriptor = null, - .SecurityQualityOfService = null, - }; - - var result: HKEY = undefined; - const rc = ntdll.NtOpenKey( - &result, - MAXIMUM_ALLOWED, - attr, - ); - switch (rc) { - .SUCCESS => return result, - else => return unexpectedStatus(rc), - } -} diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index 09510a5f91..eeda2f63b6 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -10,6 +10,7 @@ const DWORD = windows.DWORD; const FILE_INFO_BY_HANDLE_CLASS = windows.FILE_INFO_BY_HANDLE_CLASS; const HANDLE = windows.HANDLE; const HMODULE = windows.HMODULE; +const HKEY = windows.HKEY; const HRESULT = windows.HRESULT; const LARGE_INTEGER = windows.LARGE_INTEGER; const LPCWSTR = windows.LPCWSTR; @@ -57,6 +58,8 @@ const UCHAR = windows.UCHAR; const FARPROC = windows.FARPROC; const INIT_ONCE_FN = windows.INIT_ONCE_FN; const PMEMORY_BASIC_INFORMATION = windows.PMEMORY_BASIC_INFORMATION; +const REGSAM = windows.REGSAM; +const LSTATUS = windows.LSTATUS; pub extern "kernel32" fn AddVectoredExceptionHandler(First: c_ulong, Handler: ?VECTORED_EXCEPTION_HANDLER) callconv(WINAPI) ?*anyopaque; pub extern "kernel32" fn RemoveVectoredExceptionHandler(Handle: HANDLE) callconv(WINAPI) c_ulong; @@ -412,3 +415,11 @@ pub extern "kernel32" fn SleepConditionVariableSRW( pub extern "kernel32" fn TryAcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) BOOLEAN; pub extern "kernel32" fn AcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void; pub extern "kernel32" fn ReleaseSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void; + +pub extern "kernel32" fn RegOpenKeyExW( + hkey: HKEY, + lpSubKey: LPCWSTR, + ulOptions: DWORD, + samDesired: REGSAM, + phkResult: *HANDLE, +) callconv(WINAPI) LSTATUS; diff --git a/lib/std/os/windows/ntdll.zig b/lib/std/os/windows/ntdll.zig index 4932a6f679..b006a785da 100644 --- a/lib/std/os/windows/ntdll.zig +++ b/lib/std/os/windows/ntdll.zig @@ -22,6 +22,8 @@ const RTL_OSVERSIONINFOW = windows.RTL_OSVERSIONINFOW; const FILE_BASIC_INFORMATION = windows.FILE_BASIC_INFORMATION; const SIZE_T = windows.SIZE_T; const CURDIR = windows.CURDIR; +const PCWSTR = windows.PCWSTR; +const RTL_QUERY_REGISTRY_TABLE = windows.RTL_QUERY_REGISTRY_TABLE; pub const THREADINFOCLASS = enum(c_int) { ThreadBasicInformation, @@ -259,3 +261,11 @@ pub extern "ntdll" fn NtOpenKey( DesiredAccess: ACCESS_MASK, ObjectAttributes: OBJECT_ATTRIBUTES, ) callconv(WINAPI) NTSTATUS; + +pub extern "ntdll" fn RtlQueryRegistryValues( + RelativeTo: ULONG, + Path: PCWSTR, + QueryTable: [*]RTL_QUERY_REGISTRY_TABLE, + Context: ?*anyopaque, + Environment: ?*anyopaque, +) callconv(WINAPI) NTSTATUS; diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 569daf0a30..258b17031b 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -43,13 +43,124 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } +fn detectCpuModelArm64() !*const Target.Cpu.Model { + // Pull the CPU identifier from the registry. + // Assume max number of cores to be at 128. + const max_cpu_count = 128; + const cpu_count = getCpuCount(); + + if (cpu_count > max_cpu_count) return error.TooManyCpus; + + const table_size = max_cpu_count * 3 + 1; + const actual_table_size = cpu_count * 3 + 1; + var table: [table_size]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; + + // Table sentinel + table[actual_table_size - 1] = .{ + .QueryRoutine = null, + .Flags = 0, + .Name = null, + .EntryContext = null, + .DefaultType = 0, + .DefaultData = null, + .DefaultLength = 0, + }; + + // Technically, a registry value can be as long as 16k u16s. However, MS recommends storing + // values larger than 2048 in a file rather than directly in the registry, and since we + // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. + // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits + const max_sz_value = 2048; + const key_name = std.unicode.utf8ToUtf16LeStringLiteral("Identifier"); + + var i: usize = 0; + var index: usize = 0; + while (i < cpu_count) : (i += 1) { + var buf: [max_sz_value]u16 = undefined; + var buf_uni = std.os.windows.UNICODE_STRING{ + .Length = buf.len * 2, + .MaximumLength = buf.len * 2, + .Buffer = &buf, + }; + + var next_cpu_buf: [std.math.log2(max_cpu_count)]u8 = undefined; + const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{i}); + + var subkey: [std.math.log2(max_cpu_count) / 2]u16 = undefined; + const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu); + subkey[subkey_len] = 0; + + table[index] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = subkey[0..subkey_len :0], + .EntryContext = null, + .DefaultType = std.os.windows.REG_NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + + table[index + 1] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), + .EntryContext = &buf_uni, + .DefaultType = std.os.windows.REG_NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + + table[index + 2] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_TOPKEY, + .Name = null, + .EntryContext = null, + .DefaultType = std.os.windows.REG_NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + + index += 3; + } + + const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); + const res = std.os.windows.ntdll.RtlQueryRegistryValues( + std.os.windows.RTL_REGISTRY_ABSOLUTE, + topkey, + &table, + null, + null, + ); + switch (res) { + .SUCCESS => {}, + else => return error.QueryRegistryFailed, + } + + // Parse the models from strings + i = 0; + index = 0; + while (i < cpu_count) : (i += 1) { + const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[index + 1].EntryContext); + index += 3; + + var identifier_buf: [max_sz_value * 2]u8 = undefined; + const len = try std.unicode.utf16leToUtf8(&identifier_buf, entry.Buffer[0 .. entry.Length / 2]); + const identifier = identifier_buf[0..len]; + _ = identifier; + } + + return &Target.aarch64.cpu.microsoft_sq3; +} + fn detectNativeCpuAndFeaturesArm64() Target.Cpu { const Feature = Target.aarch64.Feature; + const model = detectCpuModelArm64() catch Target.Cpu.Model.generic(.aarch64); + var cpu = Target.Cpu{ .arch = .aarch64, - .model = Target.Cpu.Model.generic(.aarch64), - .features = Target.Cpu.Feature.Set.empty, + .model = model, + .features = model.features, }; if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { From 7473ef98e992519a4ff2478e610a6477f2aac86a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Nov 2022 21:15:59 +0100 Subject: [PATCH 08/15] windows: implement simplistic CPU model parser --- lib/std/zig/system/windows.zig | 87 ++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 258b17031b..5844d5d427 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -1,5 +1,6 @@ const std = @import("std"); const builtin = @import("builtin"); +const mem = std.mem; const Target = std.Target; pub const WindowsVersion = std.Target.Os.WindowsVersion; @@ -43,20 +44,68 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } +const Armv8CpuInfoImpl = struct { + cores: [8]*const Target.Cpu.Model = undefined, + core_no: usize = 0, + + const cpu_family_models = .{ + // Family, Model, Revision + .{ 8, "D4C", 0, &Target.aarch64.cpu.microsoft_sq3 }, + }; + + fn parseOne(self: *Armv8CpuInfoImpl, identifier: []const u8) void { + if (mem.indexOf(u8, identifier, "ARMv8") == null) return; // Sanity check + + var family: ?usize = null; + var model: ?[]const u8 = null; + var revision: ?usize = null; + + var tokens = mem.tokenize(u8, identifier, " "); + while (tokens.next()) |token| { + if (mem.eql(u8, token, "Family")) { + const raw = tokens.next() orelse continue; + family = std.fmt.parseInt(usize, raw, 10) catch null; + } + if (mem.eql(u8, token, "Model")) { + model = tokens.next(); + } + if (mem.eql(u8, token, "Revision")) { + const raw = tokens.next() orelse continue; + revision = std.fmt.parseInt(usize, raw, 10) catch null; + } + } + + if (family == null or model == null or revision == null) return; + + inline for (cpu_family_models) |set| { + if (set[0] == family.? and mem.eql(u8, set[1], model.?) and set[2] == revision.?) { + self.cores[self.core_no] = set[3]; + self.core_no += 1; + break; + } + } + } + + fn finalize(self: Armv8CpuInfoImpl) ?*const Target.Cpu.Model { + if (self.core_no != 8) return null; // Implies we have seen a core we don't know much about + return self.cores[0]; + } +}; + fn detectCpuModelArm64() !*const Target.Cpu.Model { // Pull the CPU identifier from the registry. - // Assume max number of cores to be at 128. - const max_cpu_count = 128; + // Assume max number of cores to be at 8. + const max_cpu_count = 8; const cpu_count = getCpuCount(); if (cpu_count > max_cpu_count) return error.TooManyCpus; - const table_size = max_cpu_count * 3 + 1; - const actual_table_size = cpu_count * 3 + 1; - var table: [table_size]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; + const table_size = max_cpu_count * 3; + const actual_table_size = cpu_count * 3; + var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; // Table sentinel - table[actual_table_size - 1] = .{ + table[actual_table_size] = .{ .QueryRoutine = null, .Flags = 0, .Name = null, @@ -86,7 +135,7 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { var next_cpu_buf: [std.math.log2(max_cpu_count)]u8 = undefined; const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{i}); - var subkey: [std.math.log2(max_cpu_count) / 2]u16 = undefined; + var subkey: [std.math.log2(max_cpu_count) + 1]u16 = undefined; const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu); subkey[subkey_len] = 0; @@ -137,6 +186,8 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { } // Parse the models from strings + var parser = Armv8CpuInfoImpl{}; + i = 0; index = 0; while (i < cpu_count) : (i += 1) { @@ -146,10 +197,10 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { var identifier_buf: [max_sz_value * 2]u8 = undefined; const len = try std.unicode.utf16leToUtf8(&identifier_buf, entry.Buffer[0 .. entry.Length / 2]); const identifier = identifier_buf[0..len]; - _ = identifier; + parser.parseOne(identifier); } - return &Target.aarch64.cpu.microsoft_sq3; + return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64); } fn detectNativeCpuAndFeaturesArm64() Target.Cpu { @@ -163,23 +214,41 @@ fn detectNativeCpuAndFeaturesArm64() Target.Cpu { .features = model.features, }; + // Override any features that are either present or absent if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.neon)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.neon)); } + if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.crc)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crc)); } + if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.crypto)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crypto)); } + if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.lse)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.lse)); } + if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.dotprod)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.dotprod)); } + if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { cpu.features.addFeature(@enumToInt(Feature.jsconv)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.jsconv)); } return cpu; From f348fbc024f2fb6dcd9b06b58482cbcb3d79cd57 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Nov 2022 10:44:10 +0100 Subject: [PATCH 09/15] windows: revert changes to definition of HKEY --- lib/std/os/windows.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 4111a82707..4c2aba206c 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2876,7 +2876,7 @@ pub const REGSAM = ACCESS_MASK; pub const ACCESS_MASK = DWORD; pub const LSTATUS = LONG; -pub const HKEY = HANDLE; +pub const HKEY = *opaque {}; pub const HKEY_LOCAL_MACHINE: HKEY = @intToPtr(HKEY, 0x80000002); From 49ce86bddf49efcd1de8768d728d77ea1c8849f8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Nov 2022 13:05:33 +0100 Subject: [PATCH 10/15] windows: fix logic for pulling info for each core --- lib/std/zig/system/windows.zig | 146 +++++++++++++++------------------ 1 file changed, 68 insertions(+), 78 deletions(-) diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 5844d5d427..35005d649b 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -92,20 +92,61 @@ const Armv8CpuInfoImpl = struct { } }; -fn detectCpuModelArm64() !*const Target.Cpu.Model { - // Pull the CPU identifier from the registry. - // Assume max number of cores to be at 8. - const max_cpu_count = 8; - const cpu_count = getCpuCount(); +fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8) ![]const u8 { + // Technically, a registry value can be as long as 16k u16s. However, MS recommends storing + // values larger than 2048 in a file rather than directly in the registry, and since we + // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. + // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits + const max_sz_value = 2048; + const key_name = std.unicode.utf8ToUtf16LeStringLiteral(key); - if (cpu_count > max_cpu_count) return error.TooManyCpus; - - const table_size = max_cpu_count * 3; - const actual_table_size = cpu_count * 3; + // Originally, I wanted to issue a single call with a more complex table structure such that we + // would sequentially visit each CPU#d subkey in the registry and pull the value of interest into + // a buffer, however, NT seems to be expecting a single buffer per each table meaning we would + // end up pulling only the last CPU core info, overwriting everything else. + // If anyone can come up with a solution to this, please do! + const table_size = 2; var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; + const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); + + var buf: [max_sz_value]u16 = undefined; + var buf_uni = std.os.windows.UNICODE_STRING{ + .Length = buf.len * 2, + .MaximumLength = buf.len * 2, + .Buffer = &buf, + }; + + const max_cpu_buf = 4; + var next_cpu_buf: [max_cpu_buf]u8 = undefined; + const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{core}); + + var subkey: [max_cpu_buf + 1]u16 = undefined; + const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu); + subkey[subkey_len] = 0; + + table[0] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = subkey[0..subkey_len :0], + .EntryContext = null, + .DefaultType = std.os.windows.REG_NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + + table[1] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), + .EntryContext = &buf_uni, + .DefaultType = std.os.windows.REG_NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + // Table sentinel - table[actual_table_size] = .{ + table[table_size] = .{ .QueryRoutine = null, .Flags = 0, .Name = null, @@ -115,64 +156,6 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { .DefaultLength = 0, }; - // Technically, a registry value can be as long as 16k u16s. However, MS recommends storing - // values larger than 2048 in a file rather than directly in the registry, and since we - // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. - // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits - const max_sz_value = 2048; - const key_name = std.unicode.utf8ToUtf16LeStringLiteral("Identifier"); - - var i: usize = 0; - var index: usize = 0; - while (i < cpu_count) : (i += 1) { - var buf: [max_sz_value]u16 = undefined; - var buf_uni = std.os.windows.UNICODE_STRING{ - .Length = buf.len * 2, - .MaximumLength = buf.len * 2, - .Buffer = &buf, - }; - - var next_cpu_buf: [std.math.log2(max_cpu_count)]u8 = undefined; - const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{i}); - - var subkey: [std.math.log2(max_cpu_count) + 1]u16 = undefined; - const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu); - subkey[subkey_len] = 0; - - table[index] = .{ - .QueryRoutine = null, - .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, - .Name = subkey[0..subkey_len :0], - .EntryContext = null, - .DefaultType = std.os.windows.REG_NONE, - .DefaultData = null, - .DefaultLength = 0, - }; - - table[index + 1] = .{ - .QueryRoutine = null, - .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, - .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), - .EntryContext = &buf_uni, - .DefaultType = std.os.windows.REG_NONE, - .DefaultData = null, - .DefaultLength = 0, - }; - - table[index + 2] = .{ - .QueryRoutine = null, - .Flags = std.os.windows.RTL_QUERY_REGISTRY_TOPKEY, - .Name = null, - .EntryContext = null, - .DefaultType = std.os.windows.REG_NONE, - .DefaultData = null, - .DefaultLength = 0, - }; - - index += 3; - } - - const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); const res = std.os.windows.ntdll.RtlQueryRegistryValues( std.os.windows.RTL_REGISTRY_ABSOLUTE, topkey, @@ -181,22 +164,29 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { null, ); switch (res) { - .SUCCESS => {}, - else => return error.QueryRegistryFailed, + .SUCCESS => { + var identifier_buf: [max_sz_value * 2]u8 = undefined; + const len = try std.unicode.utf16leToUtf8(&identifier_buf, buf_uni.Buffer[0 .. buf_uni.Length / 2]); + return identifier_buf[0..len]; + }, + else => return std.os.windows.unexpectedStatus(res), } +} + +fn detectCpuModelArm64() !*const Target.Cpu.Model { + // Pull the CPU identifier from the registry. + // Assume max number of cores to be at 8. + const max_cpu_count = 8; + const cpu_count = getCpuCount(); + + if (cpu_count > max_cpu_count) return error.TooManyCpus; // Parse the models from strings var parser = Armv8CpuInfoImpl{}; - i = 0; - index = 0; + var i: usize = 0; while (i < cpu_count) : (i += 1) { - const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[index + 1].EntryContext); - index += 3; - - var identifier_buf: [max_sz_value * 2]u8 = undefined; - const len = try std.unicode.utf16leToUtf8(&identifier_buf, entry.Buffer[0 .. entry.Length / 2]); - const identifier = identifier_buf[0..len]; + const identifier = try getCpuInfoFromRegistry(i, "Identifier"); parser.parseOne(identifier); } From 57bda6524b2dbe69c20f25e99c9b136dd0859297 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Nov 2022 13:51:27 +0100 Subject: [PATCH 11/15] windows: make registry helper generic over value types --- lib/std/os/windows.zig | 4 ++ lib/std/zig/system/windows.zig | 67 +++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 4c2aba206c..06b3cd7d77 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -3004,6 +3004,10 @@ pub const REG_RESOURCE_LIST = 8; /// Resource list in the hardware description pub const REG_FULL_RESOURCE_DESCRIPTOR = 9; pub const REG_RESOURCE_REQUIREMENTS_LIST = 10; +/// 64-bit number +pub const REG_QWORD = 11; +/// 64-bit number (same as REG_QWORD) +pub const REG_QWORD_LITTLE_ENDIAN = 11; pub const FILE_NOTIFY_INFORMATION = extern struct { NextEntryOffset: DWORD, diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 35005d649b..8294fd5b95 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -92,12 +92,7 @@ const Armv8CpuInfoImpl = struct { } }; -fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8) ![]const u8 { - // Technically, a registry value can be as long as 16k u16s. However, MS recommends storing - // values larger than 2048 in a file rather than directly in the registry, and since we - // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. - // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits - const max_sz_value = 2048; +fn getCpuInfoFromRegistry(comptime T: type, core: usize, comptime key: []const u8) !T { const key_name = std.unicode.utf8ToUtf16LeStringLiteral(key); // Originally, I wanted to issue a single call with a more complex table structure such that we @@ -110,11 +105,38 @@ fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8) ![]const u8 { const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); - var buf: [max_sz_value]u16 = undefined; - var buf_uni = std.os.windows.UNICODE_STRING{ - .Length = buf.len * 2, - .MaximumLength = buf.len * 2, - .Buffer = &buf, + // Technically, a registry value can be as long as 16k u16s. However, MS recommends storing + // values larger than 2048 in a file rather than directly in the registry, and since we + // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. + // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits + const max_sz_value = 2048; + + const ctx: *anyopaque = blk: { + switch (@typeInfo(T)) { + .Int => |int| { + const bits = int.bits; + var buf: [bits * 8]u8 = undefined; + break :blk &buf; + }, + .Pointer => |ptr| switch (ptr.size) { + .Slice => { + const child = @typeInfo(ptr.child); + if (child != .Int and child.Int.bits != 8) { + @compileError("Unsupported type " ++ @typeName(T) ++ " as registry value"); + } + + var buf: [max_sz_value]u16 = undefined; + var unicode = std.os.windows.UNICODE_STRING{ + .Length = buf.len * 2, + .MaximumLength = buf.len * 2, + .Buffer = &buf, + }; + break :blk &unicode; + }, + else => @compileError("Unsupported type " ++ @typeName(T) ++ " as registry value"), + }, + else => @compileError("Unsupported type " ++ @typeName(T) ++ " as registry value"), + } }; const max_cpu_buf = 4; @@ -139,7 +161,7 @@ fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8) ![]const u8 { .QueryRoutine = null, .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), - .EntryContext = &buf_uni, + .EntryContext = ctx, .DefaultType = std.os.windows.REG_NONE, .DefaultData = null, .DefaultLength = 0, @@ -164,10 +186,18 @@ fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8) ![]const u8 { null, ); switch (res) { - .SUCCESS => { - var identifier_buf: [max_sz_value * 2]u8 = undefined; - const len = try std.unicode.utf16leToUtf8(&identifier_buf, buf_uni.Buffer[0 .. buf_uni.Length / 2]); - return identifier_buf[0..len]; + .SUCCESS => switch (@typeInfo(T)) { + .Int => { + const entry = @ptrCast(*align(1) const T, table[1].EntryContext); + return entry.*; + }, + .Pointer => { + const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[1].EntryContext); + var identifier_buf: [max_sz_value * 2]u8 = undefined; + const len = try std.unicode.utf16leToUtf8(&identifier_buf, entry.Buffer[0 .. entry.Length / 2]); + return @as(T, identifier_buf[0..len]); + }, + else => unreachable, }, else => return std.os.windows.unexpectedStatus(res), } @@ -186,8 +216,11 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { var i: usize = 0; while (i < cpu_count) : (i += 1) { - const identifier = try getCpuInfoFromRegistry(i, "Identifier"); + const identifier = try getCpuInfoFromRegistry([]const u8, i, "Identifier"); parser.parseOne(identifier); + + const hex = try getCpuInfoFromRegistry(u64, i, "CP 4000"); + std.log.warn("{d} => {x}", .{ i, hex }); } return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64); From d64d7aaac7650d79a6f5a5c5d0db6730b6a11b2c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Nov 2022 14:13:28 +0100 Subject: [PATCH 12/15] windows: drive the registry helper with actual value set for reg entries --- lib/std/os/windows.zig | 56 ++++++++++---------- lib/std/zig/system/windows.zig | 94 +++++++++++++++++++++------------- 2 files changed, 86 insertions(+), 64 deletions(-) diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 06b3cd7d77..d4b796ae77 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2981,33 +2981,35 @@ pub const RTL_QUERY_REGISTRY_DELETE = 0x00000040; /// If the types do not match, the call fails. pub const RTL_QUERY_REGISTRY_TYPECHECK = 0x00000100; -/// No value type -pub const REG_NONE = 0; -/// Unicode nul terminated string -pub const REG_SZ = 1; -/// Unicode nul terminated string (with environment variable references) -pub const REG_EXPAND_SZ = 2; -/// Free form binary -pub const REG_BINARY = 3; -/// 32-bit number -pub const REG_DWORD = 4; -/// 32-bit number (same as REG_DWORD) -pub const REG_DWORD_LITTLE_ENDIAN = 4; -/// 32-bit number -pub const REG_DWORD_BIG_ENDIAN = 5; -/// Symbolic Link (unicode) -pub const REG_LINK = 6; -/// Multiple Unicode strings -pub const REG_MULTI_SZ = 7; -/// Resource list in the resource map -pub const REG_RESOURCE_LIST = 8; -/// Resource list in the hardware description -pub const REG_FULL_RESOURCE_DESCRIPTOR = 9; -pub const REG_RESOURCE_REQUIREMENTS_LIST = 10; -/// 64-bit number -pub const REG_QWORD = 11; -/// 64-bit number (same as REG_QWORD) -pub const REG_QWORD_LITTLE_ENDIAN = 11; +pub const REG = struct { + /// No value type + pub const NONE: ULONG = 0; + /// Unicode nul terminated string + pub const SZ: ULONG = 1; + /// Unicode nul terminated string (with environment variable references) + pub const EXPAND_SZ: ULONG = 2; + /// Free form binary + pub const BINARY: ULONG = 3; + /// 32-bit number + pub const DWORD: ULONG = 4; + /// 32-bit number (same as REG_DWORD) + pub const DWORD_LITTLE_ENDIAN: ULONG = 4; + /// 32-bit number + pub const DWORD_BIG_ENDIAN: ULONG = 5; + /// Symbolic Link (unicode) + pub const LINK: ULONG = 6; + /// Multiple Unicode strings + pub const MULTI_SZ: ULONG = 7; + /// Resource list in the resource map + pub const RESOURCE_LIST: ULONG = 8; + /// Resource list in the hardware description + pub const FULL_RESOURCE_DESCRIPTOR: ULONG = 9; + pub const RESOURCE_REQUIREMENTS_LIST: ULONG = 10; + /// 64-bit number + pub const QWORD: ULONG = 11; + /// 64-bit number (same as REG_QWORD) + pub const QWORD_LITTLE_ENDIAN: ULONG = 11; +}; pub const FILE_NOTIFY_INFORMATION = extern struct { NextEntryOffset: DWORD, diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 8294fd5b95..e1ab002834 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -5,6 +5,7 @@ const Target = std.Target; pub const WindowsVersion = std.Target.Os.WindowsVersion; pub const PF = std.os.windows.PF; +pub const REG = std.os.windows.REG; pub const IsProcessorFeaturePresent = std.os.windows.IsProcessorFeaturePresent; /// Returns the highest known WindowsVersion deduced from reported runtime information. @@ -92,7 +93,7 @@ const Armv8CpuInfoImpl = struct { } }; -fn getCpuInfoFromRegistry(comptime T: type, core: usize, comptime key: []const u8) !T { +fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8, value_type: std.os.windows.ULONG) ![]const u8 { const key_name = std.unicode.utf8ToUtf16LeStringLiteral(key); // Originally, I wanted to issue a single call with a more complex table structure such that we @@ -105,37 +106,42 @@ fn getCpuInfoFromRegistry(comptime T: type, core: usize, comptime key: []const u const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); - // Technically, a registry value can be as long as 16k u16s. However, MS recommends storing - // values larger than 2048 in a file rather than directly in the registry, and since we + // Technically, a registry value can be as long as 1MB. However, MS recommends storing + // values larger than 2048 bytes in a file rather than directly in the registry, and since we // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits - const max_sz_value = 2048; + const max_value_len = 2048; const ctx: *anyopaque = blk: { - switch (@typeInfo(T)) { - .Int => |int| { - const bits = int.bits; - var buf: [bits * 8]u8 = undefined; + switch (value_type) { + REG.NONE => unreachable, + + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + var buf: [max_value_len / 2]u16 = undefined; + var unicode = std.os.windows.UNICODE_STRING{ + .Length = max_value_len, + .MaximumLength = max_value_len, + .Buffer = &buf, + }; + break :blk &unicode; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + => { + var buf: [4]u8 = undefined; break :blk &buf; }, - .Pointer => |ptr| switch (ptr.size) { - .Slice => { - const child = @typeInfo(ptr.child); - if (child != .Int and child.Int.bits != 8) { - @compileError("Unsupported type " ++ @typeName(T) ++ " as registry value"); - } - var buf: [max_sz_value]u16 = undefined; - var unicode = std.os.windows.UNICODE_STRING{ - .Length = buf.len * 2, - .MaximumLength = buf.len * 2, - .Buffer = &buf, - }; - break :blk &unicode; - }, - else => @compileError("Unsupported type " ++ @typeName(T) ++ " as registry value"), + REG.QWORD => { + var buf: [8]u8 = undefined; + break :blk &buf; }, - else => @compileError("Unsupported type " ++ @typeName(T) ++ " as registry value"), + + else => unreachable, } }; @@ -152,7 +158,7 @@ fn getCpuInfoFromRegistry(comptime T: type, core: usize, comptime key: []const u .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, .Name = subkey[0..subkey_len :0], .EntryContext = null, - .DefaultType = std.os.windows.REG_NONE, + .DefaultType = REG.NONE, .DefaultData = null, .DefaultLength = 0, }; @@ -162,7 +168,7 @@ fn getCpuInfoFromRegistry(comptime T: type, core: usize, comptime key: []const u .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), .EntryContext = ctx, - .DefaultType = std.os.windows.REG_NONE, + .DefaultType = REG.NONE, .DefaultData = null, .DefaultLength = 0, }; @@ -186,17 +192,31 @@ fn getCpuInfoFromRegistry(comptime T: type, core: usize, comptime key: []const u null, ); switch (res) { - .SUCCESS => switch (@typeInfo(T)) { - .Int => { - const entry = @ptrCast(*align(1) const T, table[1].EntryContext); - return entry.*; - }, - .Pointer => { + .SUCCESS => switch (value_type) { + REG.NONE => unreachable, + + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[1].EntryContext); - var identifier_buf: [max_sz_value * 2]u8 = undefined; + var identifier_buf: [max_value_len]u8 = undefined; const len = try std.unicode.utf16leToUtf8(&identifier_buf, entry.Buffer[0 .. entry.Length / 2]); - return @as(T, identifier_buf[0..len]); + return identifier_buf[0..len]; }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + REG.QWORD, + => { + const entry = @ptrCast([*]align(1) const u8, table[1].EntryContext); + switch (value_type) { + REG.DWORD, REG.DWORD_BIG_ENDIAN => return entry[0..4], + REG.QWORD => return entry[0..8], + else => unreachable, + } + }, + else => unreachable, }, else => return std.os.windows.unexpectedStatus(res), @@ -216,11 +236,11 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { var i: usize = 0; while (i < cpu_count) : (i += 1) { - const identifier = try getCpuInfoFromRegistry([]const u8, i, "Identifier"); + const identifier = try getCpuInfoFromRegistry(i, "Identifier", REG.SZ); parser.parseOne(identifier); - const hex = try getCpuInfoFromRegistry(u64, i, "CP 4000"); - std.log.warn("{d} => {x}", .{ i, hex }); + const hex = try getCpuInfoFromRegistry(i, "CP 4000", REG.QWORD); + std.log.warn("{d} => {x}", .{ i, std.fmt.fmtSliceHexLower(hex) }); } return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64); From 7fbd2955fae13cdc184dbd648743fb187b3ce33d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Nov 2022 10:41:39 +0100 Subject: [PATCH 13/15] windows: pull QWORD and SZ identifiers from registry in one syscall At the same time, do not assume the values necessarily exist, and use defaults as markers for the lack of keys in the registry. --- lib/std/zig/system/windows.zig | 205 ++++++++++++++++++++------------- 1 file changed, 127 insertions(+), 78 deletions(-) diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index e1ab002834..0aa1abd941 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -93,58 +93,33 @@ const Armv8CpuInfoImpl = struct { } }; -fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8, value_type: std.os.windows.ULONG) ![]const u8 { - const key_name = std.unicode.utf8ToUtf16LeStringLiteral(key); +// Technically, a registry value can be as long as 1MB. However, MS recommends storing +// values larger than 2048 bytes in a file rather than directly in the registry, and since we +// are only accessing a system hive \Registry\Machine, we stick to MS guidelines. +// https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits +const max_value_len = 2048; +const RegistryPair = struct { + key: []const u8, + value: std.os.windows.ULONG, +}; + +fn getCpuInfoFromRegistry( + core: usize, + comptime pairs_num: comptime_int, + comptime pairs: [pairs_num]RegistryPair, + out_buf: *[pairs_num][max_value_len]u8, +) !void { // Originally, I wanted to issue a single call with a more complex table structure such that we // would sequentially visit each CPU#d subkey in the registry and pull the value of interest into // a buffer, however, NT seems to be expecting a single buffer per each table meaning we would // end up pulling only the last CPU core info, overwriting everything else. // If anyone can come up with a solution to this, please do! - const table_size = 2; + const table_size = 1 + pairs.len; var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); - // Technically, a registry value can be as long as 1MB. However, MS recommends storing - // values larger than 2048 bytes in a file rather than directly in the registry, and since we - // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. - // https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits - const max_value_len = 2048; - - const ctx: *anyopaque = blk: { - switch (value_type) { - REG.NONE => unreachable, - - REG.SZ, - REG.EXPAND_SZ, - REG.MULTI_SZ, - => { - var buf: [max_value_len / 2]u16 = undefined; - var unicode = std.os.windows.UNICODE_STRING{ - .Length = max_value_len, - .MaximumLength = max_value_len, - .Buffer = &buf, - }; - break :blk &unicode; - }, - - REG.DWORD, - REG.DWORD_BIG_ENDIAN, - => { - var buf: [4]u8 = undefined; - break :blk &buf; - }, - - REG.QWORD => { - var buf: [8]u8 = undefined; - break :blk &buf; - }, - - else => unreachable, - } - }; - const max_cpu_buf = 4; var next_cpu_buf: [max_cpu_buf]u8 = undefined; const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{core}); @@ -163,15 +138,77 @@ fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8, value_type: std .DefaultLength = 0, }; - table[1] = .{ - .QueryRoutine = null, - .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, - .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), - .EntryContext = ctx, - .DefaultType = REG.NONE, - .DefaultData = null, - .DefaultLength = 0, - }; + inline for (pairs) |pair, i| { + const ctx: *anyopaque = blk: { + switch (pair.value) { + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + var buf: [max_value_len / 2]u16 = undefined; + var unicode = std.os.windows.UNICODE_STRING{ + .Length = 0, + .MaximumLength = max_value_len, + .Buffer = &buf, + }; + break :blk &unicode; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + => { + var buf: [4]u8 = undefined; + break :blk &buf; + }, + + REG.QWORD => { + var buf: [8]u8 = undefined; + break :blk &buf; + }, + + else => unreachable, + } + }; + const default: struct { ptr: *anyopaque, len: u32 } = blk: { + switch (pair.value) { + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + const def = std.unicode.utf8ToUtf16LeStringLiteral("Unknown"); + var buf: [def.len + 1]u16 = undefined; + mem.copy(u16, &buf, def); + buf[def.len] = 0; + break :blk .{ .ptr = &buf, .len = @intCast(u32, (buf.len + 1) * 2) }; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + => { + var buf: [4]u8 = [_]u8{0} ** 4; + break :blk .{ .ptr = &buf, .len = 4 }; + }, + + REG.QWORD => { + var buf: [8]u8 = [_]u8{0} ** 8; + break :blk .{ .ptr = &buf, .len = 8 }; + }, + + else => unreachable, + } + }; + const key_name = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); + + table[i + 1] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), + .EntryContext = ctx, + .DefaultType = pair.value, + .DefaultData = default.ptr, + .DefaultLength = default.len, + }; + } // Table sentinel table[table_size] = .{ @@ -192,32 +229,37 @@ fn getCpuInfoFromRegistry(core: usize, comptime key: []const u8, value_type: std null, ); switch (res) { - .SUCCESS => switch (value_type) { - REG.NONE => unreachable, + .SUCCESS => { + inline for (pairs) |pair, i| switch (pair.value) { + REG.NONE => unreachable, - REG.SZ, - REG.EXPAND_SZ, - REG.MULTI_SZ, - => { - const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[1].EntryContext); - var identifier_buf: [max_value_len]u8 = undefined; - const len = try std.unicode.utf16leToUtf8(&identifier_buf, entry.Buffer[0 .. entry.Length / 2]); - return identifier_buf[0..len]; - }, + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[i + 1].EntryContext); + const len = try std.unicode.utf16leToUtf8(out_buf[i][0..], entry.Buffer[0 .. entry.Length / 2]); + out_buf[i][len] = 0; + }, - REG.DWORD, - REG.DWORD_BIG_ENDIAN, - REG.QWORD, - => { - const entry = @ptrCast([*]align(1) const u8, table[1].EntryContext); - switch (value_type) { - REG.DWORD, REG.DWORD_BIG_ENDIAN => return entry[0..4], - REG.QWORD => return entry[0..8], - else => unreachable, - } - }, + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + REG.QWORD, + => { + const entry = @ptrCast([*]align(1) const u8, table[i + 1].EntryContext); + switch (pair.value) { + REG.DWORD, REG.DWORD_BIG_ENDIAN => { + mem.copy(u8, out_buf[i][0..4], entry[0..4]); + }, + REG.QWORD => { + mem.copy(u8, out_buf[i][0..8], entry[0..8]); + }, + else => unreachable, + } + }, - else => unreachable, + else => unreachable, + }; }, else => return std.os.windows.unexpectedStatus(res), } @@ -234,13 +276,20 @@ fn detectCpuModelArm64() !*const Target.Cpu.Model { // Parse the models from strings var parser = Armv8CpuInfoImpl{}; + var out_buf: [3][max_value_len]u8 = undefined; + var i: usize = 0; while (i < cpu_count) : (i += 1) { - const identifier = try getCpuInfoFromRegistry(i, "Identifier", REG.SZ); - parser.parseOne(identifier); + try getCpuInfoFromRegistry(i, 3, .{ + .{ .key = "CP 4000", .value = REG.QWORD }, + .{ .key = "Identifier", .value = REG.SZ }, + .{ .key = "VendorIdentifier", .value = REG.SZ }, + }, &out_buf); - const hex = try getCpuInfoFromRegistry(i, "CP 4000", REG.QWORD); - std.log.warn("{d} => {x}", .{ i, std.fmt.fmtSliceHexLower(hex) }); + const hex = out_buf[0][0..8]; + const identifier = mem.sliceTo(out_buf[1][0..], 0); + const vendor_identifier = mem.sliceTo(out_buf[2][0..], 0); + std.log.warn("{d} => {x}, {s}, {s}", .{ i, std.fmt.fmtSliceHexLower(hex), identifier, vendor_identifier }); } return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64); From 7bf12b1197823a5b0554dc3f7f67074df5fcafb1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Nov 2022 17:03:14 +0100 Subject: [PATCH 14/15] arm: move cpu model table into system/arm.zig Now we can reuse the table between CPU model parsers on Linux and Windows. Use similar parsing structure for Windows as we do for Linux. On Windows, we rely on two entries in the registry per CPU core: `CP 4000` and `Identifier`. Collating the data from the two allows us recreating most of the `/proc/cpuinfo` data natively on Windows. Additionally, we still allow for overwriting any CPU features as flagged by pulling the feature data embedded in `SharedUserData`. --- lib/std/target/aarch64.zig | 15 -- lib/std/zig/system/arm.zig | 134 +++++++++++++ lib/std/zig/system/linux.zig | 131 +------------ lib/std/zig/system/windows.zig | 339 ++++++++++++++++----------------- 4 files changed, 310 insertions(+), 309 deletions(-) create mode 100644 lib/std/zig/system/arm.zig diff --git a/lib/std/target/aarch64.zig b/lib/std/target/aarch64.zig index 2fd0d337e3..af50c9d890 100644 --- a/lib/std/target/aarch64.zig +++ b/lib/std/target/aarch64.zig @@ -2252,19 +2252,4 @@ pub const cpu = struct { .v8a, }), }; - - pub const microsoft_sq3 = CpuModel{ - .name = "microsoft_sq3", - .llvm_name = "generic", - .features = featureSet(&[_]Feature{ - .aes, - .crc, - .crypto, - .dotprod, - .fp_armv8, - .lse, - .neon, - .sha2, - }), - }; }; diff --git a/lib/std/zig/system/arm.zig b/lib/std/zig/system/arm.zig new file mode 100644 index 0000000000..b6f06206bc --- /dev/null +++ b/lib/std/zig/system/arm.zig @@ -0,0 +1,134 @@ +const std = @import("std"); + +pub const CoreInfo = struct { + architecture: u8 = 0, + implementer: u8 = 0, + variant: u8 = 0, + part: u16 = 0, +}; + +pub const cpu_models = struct { + // Shorthands to simplify the tables below. + const A32 = std.Target.arm.cpu; + const A64 = std.Target.aarch64.cpu; + + const E = struct { + part: u16, + variant: ?u8 = null, // null if matches any variant + m32: ?*const std.Target.Cpu.Model = null, + m64: ?*const std.Target.Cpu.Model = null, + }; + + // implementer = 0x41 + const ARM = [_]E{ + E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, + E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, + E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, + E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, + E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, + E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, + E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, + E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, + E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, + E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, + E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, + E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, + E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, + E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, + E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, + E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, + E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, + E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, + E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, + E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, + E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, + E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, + E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, + E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, + E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, + E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, + E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, + E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, + E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, + E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, + E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, + E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, + E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, + E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, + E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, + E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, + // This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html + E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c }, + E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, + E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, + E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, + E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, + }; + // implementer = 0x42 + const Broadcom = [_]E{ + E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x43 + const Cavium = [_]E{ + E{ .part = 0x0a0, .m64 = &A64.thunderx }, + E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, + E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, + E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, + E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x46 + const Fujitsu = [_]E{ + E{ .part = 0x001, .m64 = &A64.a64fx }, + }; + // implementer = 0x48 + const HiSilicon = [_]E{ + E{ .part = 0xd01, .m64 = &A64.tsv110 }, + }; + // implementer = 0x4e + const Nvidia = [_]E{ + E{ .part = 0x004, .m64 = &A64.carmel }, + }; + // implementer = 0x50 + const Ampere = [_]E{ + E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, + E{ .part = 0x000, .m64 = &A64.xgene1 }, + }; + // implementer = 0x51 + const Qualcomm = [_]E{ + E{ .part = 0x06f, .m32 = &A32.krait }, + E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0xc00, .m64 = &A64.falkor }, + E{ .part = 0xc01, .m64 = &A64.saphira }, + }; + + pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model { + const models = switch (core.implementer) { + 0x41 => &ARM, + 0x42 => &Broadcom, + 0x43 => &Cavium, + 0x46 => &Fujitsu, + 0x48 => &HiSilicon, + 0x50 => &Ampere, + 0x51 => &Qualcomm, + else => return null, + }; + + for (models) |model| { + if (model.part == core.part and + (model.variant == null or model.variant.? == core.variant)) + return if (is_64bit) model.m64 else model.m32; + } + + return null; + } +}; diff --git a/lib/std/zig/system/linux.zig b/lib/std/zig/system/linux.zig index e92aacb6ef..63a49c6472 100644 --- a/lib/std/zig/system/linux.zig +++ b/lib/std/zig/system/linux.zig @@ -159,129 +159,7 @@ const ArmCpuinfoImpl = struct { is_really_v6: bool = false, }; - const cpu_models = struct { - // Shorthands to simplify the tables below. - const A32 = Target.arm.cpu; - const A64 = Target.aarch64.cpu; - - const E = struct { - part: u16, - variant: ?u8 = null, // null if matches any variant - m32: ?*const Target.Cpu.Model = null, - m64: ?*const Target.Cpu.Model = null, - }; - - // implementer = 0x41 - const ARM = [_]E{ - E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, - E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, - E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, - E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, - E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, - E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, - E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, - E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, - E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, - E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, - E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, - E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, - E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, - E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, - E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, - E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, - E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, - E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, - E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, - E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, - E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, - E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, - E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, - E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, - E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, - E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, - E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, - E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, - E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, - E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, - E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, - E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, - E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, - E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, - E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, - E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, - E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, - E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, - E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, - E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, - }; - // implementer = 0x42 - const Broadcom = [_]E{ - E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x43 - const Cavium = [_]E{ - E{ .part = 0x0a0, .m64 = &A64.thunderx }, - E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, - E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, - E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, - E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x46 - const Fujitsu = [_]E{ - E{ .part = 0x001, .m64 = &A64.a64fx }, - }; - // implementer = 0x48 - const HiSilicon = [_]E{ - E{ .part = 0xd01, .m64 = &A64.tsv110 }, - }; - // implementer = 0x4e - const Nvidia = [_]E{ - E{ .part = 0x004, .m64 = &A64.carmel }, - }; - // implementer = 0x50 - const Ampere = [_]E{ - E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, - E{ .part = 0x000, .m64 = &A64.xgene1 }, - }; - // implementer = 0x51 - const Qualcomm = [_]E{ - E{ .part = 0x06f, .m32 = &A32.krait }, - E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0xc00, .m64 = &A64.falkor }, - E{ .part = 0xc01, .m64 = &A64.saphira }, - }; - - fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model { - const models = switch (core.implementer) { - 0x41 => &ARM, - 0x42 => &Broadcom, - 0x43 => &Cavium, - 0x46 => &Fujitsu, - 0x48 => &HiSilicon, - 0x50 => &Ampere, - 0x51 => &Qualcomm, - else => return null, - }; - - for (models) |model| { - if (model.part == core.part and - (model.variant == null or model.variant.? == core.variant)) - return if (is_64bit) model.m64 else model.m32; - } - - return null; - } - }; + const cpu_models = @import("arm.zig").cpu_models; fn addOne(self: *ArmCpuinfoImpl) void { if (self.have_fields == 4 and self.core_no < self.cores.len) { @@ -346,7 +224,12 @@ const ArmCpuinfoImpl = struct { var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; for (self.cores[0..self.core_no]) |core, i| { - known_models[i] = cpu_models.isKnown(core, is_64bit); + known_models[i] = cpu_models.isKnown(.{ + .architecture = core.architecture, + .implementer = core.implementer, + .variant = core.variant, + .part = core.part, + }, is_64bit); } // XXX We pick the first core on big.LITTLE systems, hopefully the diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 0aa1abd941..f11905873d 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -45,54 +45,6 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } -const Armv8CpuInfoImpl = struct { - cores: [8]*const Target.Cpu.Model = undefined, - core_no: usize = 0, - - const cpu_family_models = .{ - // Family, Model, Revision - .{ 8, "D4C", 0, &Target.aarch64.cpu.microsoft_sq3 }, - }; - - fn parseOne(self: *Armv8CpuInfoImpl, identifier: []const u8) void { - if (mem.indexOf(u8, identifier, "ARMv8") == null) return; // Sanity check - - var family: ?usize = null; - var model: ?[]const u8 = null; - var revision: ?usize = null; - - var tokens = mem.tokenize(u8, identifier, " "); - while (tokens.next()) |token| { - if (mem.eql(u8, token, "Family")) { - const raw = tokens.next() orelse continue; - family = std.fmt.parseInt(usize, raw, 10) catch null; - } - if (mem.eql(u8, token, "Model")) { - model = tokens.next(); - } - if (mem.eql(u8, token, "Revision")) { - const raw = tokens.next() orelse continue; - revision = std.fmt.parseInt(usize, raw, 10) catch null; - } - } - - if (family == null or model == null or revision == null) return; - - inline for (cpu_family_models) |set| { - if (set[0] == family.? and mem.eql(u8, set[1], model.?) and set[2] == revision.?) { - self.cores[self.core_no] = set[3]; - self.core_no += 1; - break; - } - } - } - - fn finalize(self: Armv8CpuInfoImpl) ?*const Target.Cpu.Model { - if (self.core_no != 8) return null; // Implies we have seen a core we don't know much about - return self.cores[0]; - } -}; - // Technically, a registry value can be as long as 1MB. However, MS recommends storing // values larger than 2048 bytes in a file rather than directly in the registry, and since we // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. @@ -169,44 +121,16 @@ fn getCpuInfoFromRegistry( else => unreachable, } }; - const default: struct { ptr: *anyopaque, len: u32 } = blk: { - switch (pair.value) { - REG.SZ, - REG.EXPAND_SZ, - REG.MULTI_SZ, - => { - const def = std.unicode.utf8ToUtf16LeStringLiteral("Unknown"); - var buf: [def.len + 1]u16 = undefined; - mem.copy(u16, &buf, def); - buf[def.len] = 0; - break :blk .{ .ptr = &buf, .len = @intCast(u32, (buf.len + 1) * 2) }; - }, - - REG.DWORD, - REG.DWORD_BIG_ENDIAN, - => { - var buf: [4]u8 = [_]u8{0} ** 4; - break :blk .{ .ptr = &buf, .len = 4 }; - }, - - REG.QWORD => { - var buf: [8]u8 = [_]u8{0} ** 8; - break :blk .{ .ptr = &buf, .len = 8 }; - }, - - else => unreachable, - } - }; - const key_name = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); + const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); table[i + 1] = .{ .QueryRoutine = null, - .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT, - .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)), .EntryContext = ctx, - .DefaultType = pair.value, - .DefaultData = default.ptr, - .DefaultLength = default.len, + .DefaultType = REG.NONE, + .DefaultData = null, + .DefaultLength = 0, }; } @@ -261,102 +185,177 @@ fn getCpuInfoFromRegistry( else => unreachable, }; }, - else => return std.os.windows.unexpectedStatus(res), + else => return error.Unexpected, } } -fn detectCpuModelArm64() !*const Target.Cpu.Model { - // Pull the CPU identifier from the registry. - // Assume max number of cores to be at 8. - const max_cpu_count = 8; - const cpu_count = getCpuCount(); - - if (cpu_count > max_cpu_count) return error.TooManyCpus; - - // Parse the models from strings - var parser = Armv8CpuInfoImpl{}; - - var out_buf: [3][max_value_len]u8 = undefined; - - var i: usize = 0; - while (i < cpu_count) : (i += 1) { - try getCpuInfoFromRegistry(i, 3, .{ - .{ .key = "CP 4000", .value = REG.QWORD }, - .{ .key = "Identifier", .value = REG.SZ }, - .{ .key = "VendorIdentifier", .value = REG.SZ }, - }, &out_buf); - - const hex = out_buf[0][0..8]; - const identifier = mem.sliceTo(out_buf[1][0..], 0); - const vendor_identifier = mem.sliceTo(out_buf[2][0..], 0); - std.log.warn("{d} => {x}, {s}, {s}", .{ i, std.fmt.fmtSliceHexLower(hex), identifier, vendor_identifier }); - } - - return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64); -} - -fn detectNativeCpuAndFeaturesArm64() Target.Cpu { - const Feature = Target.aarch64.Feature; - - const model = detectCpuModelArm64() catch Target.Cpu.Model.generic(.aarch64); - - var cpu = Target.Cpu{ - .arch = .aarch64, - .model = model, - .features = model.features, - }; - - // Override any features that are either present or absent - if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.neon)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.neon)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.crc)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.crc)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.crypto)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.crypto)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.lse)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.lse)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.dotprod)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.dotprod)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.jsconv)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.jsconv)); - } - - return cpu; -} - fn getCpuCount() usize { return std.os.windows.peb().NumberOfProcessors; } -pub fn detectNativeCpuAndFeatures() ?Target.Cpu { - switch (builtin.cpu.arch) { - .aarch64 => return detectNativeCpuAndFeaturesArm64(), - else => |arch| return .{ +const ArmCpuInfoImpl = struct { + cores: [4]CoreInfo = undefined, + core_no: usize = 0, + have_fields: usize = 0, + + const CoreInfo = @import("arm.zig").CoreInfo; + const cpu_models = @import("arm.zig").cpu_models; + + const Data = struct { + cp_4000: []const u8, + identifier: []const u8, + }; + + fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void { + const info = &self.cores[self.core_no]; + info.* = .{}; + + // CPU part + info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4; + self.have_fields += 1; + + // CPU implementer + info.implementer = data.cp_4000[3]; + self.have_fields += 1; + + var tokens = mem.tokenize(u8, data.identifier, " "); + while (tokens.next()) |token| { + if (mem.eql(u8, "Family", token)) { + // CPU architecture + const family = tokens.next() orelse continue; + info.architecture = try std.fmt.parseInt(u8, family, 10); + self.have_fields += 1; + break; + } + } else return; + + self.addOne(); + } + + fn addOne(self: *ArmCpuInfoImpl) void { + if (self.have_fields == 3 and self.core_no < self.cores.len) { + if (self.core_no > 0) { + // Deduplicate the core info. + for (self.cores[0..self.core_no]) |it| { + if (std.meta.eql(it, self.cores[self.core_no])) + return; + } + } + self.core_no += 1; + } + } + + fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu { + if (self.core_no == 0) return null; + + const is_64bit = switch (arch) { + .aarch64, .aarch64_be, .aarch64_32 => true, + else => false, + }; + + var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; + for (self.cores[0..self.core_no]) |core, i| { + known_models[i] = cpu_models.isKnown(core, is_64bit); + } + + // XXX We pick the first core on big.LITTLE systems, hopefully the + // LITTLE one. + const model = known_models[0] orelse return null; + return Target.Cpu{ .arch = arch, - .model = Target.Cpu.Model.generic(arch), - .features = Target.Cpu.Feature.Set.empty, + .model = model, + .features = model.features, + }; + } +}; + +const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl); + +fn CpuInfoParser(comptime impl: anytype) type { + return struct { + fn parse(arch: Target.Cpu.Arch) !?Target.Cpu { + var obj: impl = .{}; + var out_buf: [2][max_value_len]u8 = undefined; + + var i: usize = 0; + while (i < getCpuCount()) : (i += 1) { + try getCpuInfoFromRegistry(i, 2, .{ + .{ .key = "CP 4000", .value = REG.QWORD }, + .{ .key = "Identifier", .value = REG.SZ }, + }, &out_buf); + + const cp_4000 = out_buf[0][0..8]; + const identifier = mem.sliceTo(out_buf[1][0..], 0); + + try obj.parseDataHook(.{ + .cp_4000 = cp_4000, + .identifier = identifier, + }); + } + + return obj.finalize(arch); + } + }; +} + +fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu { + return .{ + .arch = arch, + .model = Target.Cpu.Model.generic(arch), + .features = Target.Cpu.Feature.Set.empty, + }; +} + +pub fn detectNativeCpuAndFeatures() ?Target.Cpu { + const current_arch = builtin.cpu.arch; + switch (current_arch) { + .aarch64, .aarch64_be, .aarch64_32 => { + var cpu = cpu: { + var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch); + break :cpu maybe_cpu orelse genericCpu(current_arch); + }; + + const Feature = Target.aarch64.Feature; + + // Override any features that are either present or absent + if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.neon)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.neon)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crc)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crc)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crypto)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crypto)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.lse)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.lse)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.dotprod)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.dotprod)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.jsconv)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.jsconv)); + } + + return cpu; }, + else => {}, } } From 25281891013619aa767b02418d08d84d111fd7f1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Nov 2022 17:09:03 +0100 Subject: [PATCH 15/15] windows: fix signature of kernel32.RegOpenKeyExW to use *HKEY --- lib/std/os/windows/kernel32.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index eeda2f63b6..e0c7b96f84 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -421,5 +421,5 @@ pub extern "kernel32" fn RegOpenKeyExW( lpSubKey: LPCWSTR, ulOptions: DWORD, samDesired: REGSAM, - phkResult: *HANDLE, + phkResult: *HKEY, ) callconv(WINAPI) LSTATUS;