From 7bf12b1197823a5b0554dc3f7f67074df5fcafb1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Nov 2022 17:03:14 +0100 Subject: [PATCH] arm: move cpu model table into system/arm.zig Now we can reuse the table between CPU model parsers on Linux and Windows. Use similar parsing structure for Windows as we do for Linux. On Windows, we rely on two entries in the registry per CPU core: `CP 4000` and `Identifier`. Collating the data from the two allows us recreating most of the `/proc/cpuinfo` data natively on Windows. Additionally, we still allow for overwriting any CPU features as flagged by pulling the feature data embedded in `SharedUserData`. --- lib/std/target/aarch64.zig | 15 -- lib/std/zig/system/arm.zig | 134 +++++++++++++ lib/std/zig/system/linux.zig | 131 +------------ lib/std/zig/system/windows.zig | 339 ++++++++++++++++----------------- 4 files changed, 310 insertions(+), 309 deletions(-) create mode 100644 lib/std/zig/system/arm.zig diff --git a/lib/std/target/aarch64.zig b/lib/std/target/aarch64.zig index 2fd0d337e3..af50c9d890 100644 --- a/lib/std/target/aarch64.zig +++ b/lib/std/target/aarch64.zig @@ -2252,19 +2252,4 @@ pub const cpu = struct { .v8a, }), }; - - pub const microsoft_sq3 = CpuModel{ - .name = "microsoft_sq3", - .llvm_name = "generic", - .features = featureSet(&[_]Feature{ - .aes, - .crc, - .crypto, - .dotprod, - .fp_armv8, - .lse, - .neon, - .sha2, - }), - }; }; diff --git a/lib/std/zig/system/arm.zig b/lib/std/zig/system/arm.zig new file mode 100644 index 0000000000..b6f06206bc --- /dev/null +++ b/lib/std/zig/system/arm.zig @@ -0,0 +1,134 @@ +const std = @import("std"); + +pub const CoreInfo = struct { + architecture: u8 = 0, + implementer: u8 = 0, + variant: u8 = 0, + part: u16 = 0, +}; + +pub const cpu_models = struct { + // Shorthands to simplify the tables below. + const A32 = std.Target.arm.cpu; + const A64 = std.Target.aarch64.cpu; + + const E = struct { + part: u16, + variant: ?u8 = null, // null if matches any variant + m32: ?*const std.Target.Cpu.Model = null, + m64: ?*const std.Target.Cpu.Model = null, + }; + + // implementer = 0x41 + const ARM = [_]E{ + E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, + E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, + E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, + E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, + E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, + E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, + E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, + E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, + E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, + E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, + E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, + E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, + E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, + E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, + E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, + E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, + E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, + E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, + E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, + E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, + E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, + E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, + E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, + E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, + E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, + E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, + E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, + E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, + E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, + E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, + E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, + E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, + E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, + E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, + E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, + E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, + // This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html + E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c }, + E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, + E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, + E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, + E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, + }; + // implementer = 0x42 + const Broadcom = [_]E{ + E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x43 + const Cavium = [_]E{ + E{ .part = 0x0a0, .m64 = &A64.thunderx }, + E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, + E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, + E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, + E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x46 + const Fujitsu = [_]E{ + E{ .part = 0x001, .m64 = &A64.a64fx }, + }; + // implementer = 0x48 + const HiSilicon = [_]E{ + E{ .part = 0xd01, .m64 = &A64.tsv110 }, + }; + // implementer = 0x4e + const Nvidia = [_]E{ + E{ .part = 0x004, .m64 = &A64.carmel }, + }; + // implementer = 0x50 + const Ampere = [_]E{ + E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, + E{ .part = 0x000, .m64 = &A64.xgene1 }, + }; + // implementer = 0x51 + const Qualcomm = [_]E{ + E{ .part = 0x06f, .m32 = &A32.krait }, + E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0xc00, .m64 = &A64.falkor }, + E{ .part = 0xc01, .m64 = &A64.saphira }, + }; + + pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model { + const models = switch (core.implementer) { + 0x41 => &ARM, + 0x42 => &Broadcom, + 0x43 => &Cavium, + 0x46 => &Fujitsu, + 0x48 => &HiSilicon, + 0x50 => &Ampere, + 0x51 => &Qualcomm, + else => return null, + }; + + for (models) |model| { + if (model.part == core.part and + (model.variant == null or model.variant.? == core.variant)) + return if (is_64bit) model.m64 else model.m32; + } + + return null; + } +}; diff --git a/lib/std/zig/system/linux.zig b/lib/std/zig/system/linux.zig index e92aacb6ef..63a49c6472 100644 --- a/lib/std/zig/system/linux.zig +++ b/lib/std/zig/system/linux.zig @@ -159,129 +159,7 @@ const ArmCpuinfoImpl = struct { is_really_v6: bool = false, }; - const cpu_models = struct { - // Shorthands to simplify the tables below. - const A32 = Target.arm.cpu; - const A64 = Target.aarch64.cpu; - - const E = struct { - part: u16, - variant: ?u8 = null, // null if matches any variant - m32: ?*const Target.Cpu.Model = null, - m64: ?*const Target.Cpu.Model = null, - }; - - // implementer = 0x41 - const ARM = [_]E{ - E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, - E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, - E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, - E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, - E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, - E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, - E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, - E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, - E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, - E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, - E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, - E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, - E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, - E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, - E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, - E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, - E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, - E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, - E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, - E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, - E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, - E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, - E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, - E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, - E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, - E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, - E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, - E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, - E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, - E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, - E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, - E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, - E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, - E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, - E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, - E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, - E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, - E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, - E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, - E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, - }; - // implementer = 0x42 - const Broadcom = [_]E{ - E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x43 - const Cavium = [_]E{ - E{ .part = 0x0a0, .m64 = &A64.thunderx }, - E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, - E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, - E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, - E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x46 - const Fujitsu = [_]E{ - E{ .part = 0x001, .m64 = &A64.a64fx }, - }; - // implementer = 0x48 - const HiSilicon = [_]E{ - E{ .part = 0xd01, .m64 = &A64.tsv110 }, - }; - // implementer = 0x4e - const Nvidia = [_]E{ - E{ .part = 0x004, .m64 = &A64.carmel }, - }; - // implementer = 0x50 - const Ampere = [_]E{ - E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, - E{ .part = 0x000, .m64 = &A64.xgene1 }, - }; - // implementer = 0x51 - const Qualcomm = [_]E{ - E{ .part = 0x06f, .m32 = &A32.krait }, - E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0xc00, .m64 = &A64.falkor }, - E{ .part = 0xc01, .m64 = &A64.saphira }, - }; - - fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model { - const models = switch (core.implementer) { - 0x41 => &ARM, - 0x42 => &Broadcom, - 0x43 => &Cavium, - 0x46 => &Fujitsu, - 0x48 => &HiSilicon, - 0x50 => &Ampere, - 0x51 => &Qualcomm, - else => return null, - }; - - for (models) |model| { - if (model.part == core.part and - (model.variant == null or model.variant.? == core.variant)) - return if (is_64bit) model.m64 else model.m32; - } - - return null; - } - }; + const cpu_models = @import("arm.zig").cpu_models; fn addOne(self: *ArmCpuinfoImpl) void { if (self.have_fields == 4 and self.core_no < self.cores.len) { @@ -346,7 +224,12 @@ const ArmCpuinfoImpl = struct { var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; for (self.cores[0..self.core_no]) |core, i| { - known_models[i] = cpu_models.isKnown(core, is_64bit); + known_models[i] = cpu_models.isKnown(.{ + .architecture = core.architecture, + .implementer = core.implementer, + .variant = core.variant, + .part = core.part, + }, is_64bit); } // XXX We pick the first core on big.LITTLE systems, hopefully the diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 0aa1abd941..f11905873d 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -45,54 +45,6 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } -const Armv8CpuInfoImpl = struct { - cores: [8]*const Target.Cpu.Model = undefined, - core_no: usize = 0, - - const cpu_family_models = .{ - // Family, Model, Revision - .{ 8, "D4C", 0, &Target.aarch64.cpu.microsoft_sq3 }, - }; - - fn parseOne(self: *Armv8CpuInfoImpl, identifier: []const u8) void { - if (mem.indexOf(u8, identifier, "ARMv8") == null) return; // Sanity check - - var family: ?usize = null; - var model: ?[]const u8 = null; - var revision: ?usize = null; - - var tokens = mem.tokenize(u8, identifier, " "); - while (tokens.next()) |token| { - if (mem.eql(u8, token, "Family")) { - const raw = tokens.next() orelse continue; - family = std.fmt.parseInt(usize, raw, 10) catch null; - } - if (mem.eql(u8, token, "Model")) { - model = tokens.next(); - } - if (mem.eql(u8, token, "Revision")) { - const raw = tokens.next() orelse continue; - revision = std.fmt.parseInt(usize, raw, 10) catch null; - } - } - - if (family == null or model == null or revision == null) return; - - inline for (cpu_family_models) |set| { - if (set[0] == family.? and mem.eql(u8, set[1], model.?) and set[2] == revision.?) { - self.cores[self.core_no] = set[3]; - self.core_no += 1; - break; - } - } - } - - fn finalize(self: Armv8CpuInfoImpl) ?*const Target.Cpu.Model { - if (self.core_no != 8) return null; // Implies we have seen a core we don't know much about - return self.cores[0]; - } -}; - // Technically, a registry value can be as long as 1MB. However, MS recommends storing // values larger than 2048 bytes in a file rather than directly in the registry, and since we // are only accessing a system hive \Registry\Machine, we stick to MS guidelines. @@ -169,44 +121,16 @@ fn getCpuInfoFromRegistry( else => unreachable, } }; - const default: struct { ptr: *anyopaque, len: u32 } = blk: { - switch (pair.value) { - REG.SZ, - REG.EXPAND_SZ, - REG.MULTI_SZ, - => { - const def = std.unicode.utf8ToUtf16LeStringLiteral("Unknown"); - var buf: [def.len + 1]u16 = undefined; - mem.copy(u16, &buf, def); - buf[def.len] = 0; - break :blk .{ .ptr = &buf, .len = @intCast(u32, (buf.len + 1) * 2) }; - }, - - REG.DWORD, - REG.DWORD_BIG_ENDIAN, - => { - var buf: [4]u8 = [_]u8{0} ** 4; - break :blk .{ .ptr = &buf, .len = 4 }; - }, - - REG.QWORD => { - var buf: [8]u8 = [_]u8{0} ** 8; - break :blk .{ .ptr = &buf, .len = 8 }; - }, - - else => unreachable, - } - }; - const key_name = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); + const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); table[i + 1] = .{ .QueryRoutine = null, - .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT, - .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)), + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)), .EntryContext = ctx, - .DefaultType = pair.value, - .DefaultData = default.ptr, - .DefaultLength = default.len, + .DefaultType = REG.NONE, + .DefaultData = null, + .DefaultLength = 0, }; } @@ -261,102 +185,177 @@ fn getCpuInfoFromRegistry( else => unreachable, }; }, - else => return std.os.windows.unexpectedStatus(res), + else => return error.Unexpected, } } -fn detectCpuModelArm64() !*const Target.Cpu.Model { - // Pull the CPU identifier from the registry. - // Assume max number of cores to be at 8. - const max_cpu_count = 8; - const cpu_count = getCpuCount(); - - if (cpu_count > max_cpu_count) return error.TooManyCpus; - - // Parse the models from strings - var parser = Armv8CpuInfoImpl{}; - - var out_buf: [3][max_value_len]u8 = undefined; - - var i: usize = 0; - while (i < cpu_count) : (i += 1) { - try getCpuInfoFromRegistry(i, 3, .{ - .{ .key = "CP 4000", .value = REG.QWORD }, - .{ .key = "Identifier", .value = REG.SZ }, - .{ .key = "VendorIdentifier", .value = REG.SZ }, - }, &out_buf); - - const hex = out_buf[0][0..8]; - const identifier = mem.sliceTo(out_buf[1][0..], 0); - const vendor_identifier = mem.sliceTo(out_buf[2][0..], 0); - std.log.warn("{d} => {x}, {s}, {s}", .{ i, std.fmt.fmtSliceHexLower(hex), identifier, vendor_identifier }); - } - - return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64); -} - -fn detectNativeCpuAndFeaturesArm64() Target.Cpu { - const Feature = Target.aarch64.Feature; - - const model = detectCpuModelArm64() catch Target.Cpu.Model.generic(.aarch64); - - var cpu = Target.Cpu{ - .arch = .aarch64, - .model = model, - .features = model.features, - }; - - // Override any features that are either present or absent - if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.neon)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.neon)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.crc)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.crc)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.crypto)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.crypto)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.lse)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.lse)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.dotprod)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.dotprod)); - } - - if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { - cpu.features.addFeature(@enumToInt(Feature.jsconv)); - } else { - cpu.features.removeFeature(@enumToInt(Feature.jsconv)); - } - - return cpu; -} - fn getCpuCount() usize { return std.os.windows.peb().NumberOfProcessors; } -pub fn detectNativeCpuAndFeatures() ?Target.Cpu { - switch (builtin.cpu.arch) { - .aarch64 => return detectNativeCpuAndFeaturesArm64(), - else => |arch| return .{ +const ArmCpuInfoImpl = struct { + cores: [4]CoreInfo = undefined, + core_no: usize = 0, + have_fields: usize = 0, + + const CoreInfo = @import("arm.zig").CoreInfo; + const cpu_models = @import("arm.zig").cpu_models; + + const Data = struct { + cp_4000: []const u8, + identifier: []const u8, + }; + + fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void { + const info = &self.cores[self.core_no]; + info.* = .{}; + + // CPU part + info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4; + self.have_fields += 1; + + // CPU implementer + info.implementer = data.cp_4000[3]; + self.have_fields += 1; + + var tokens = mem.tokenize(u8, data.identifier, " "); + while (tokens.next()) |token| { + if (mem.eql(u8, "Family", token)) { + // CPU architecture + const family = tokens.next() orelse continue; + info.architecture = try std.fmt.parseInt(u8, family, 10); + self.have_fields += 1; + break; + } + } else return; + + self.addOne(); + } + + fn addOne(self: *ArmCpuInfoImpl) void { + if (self.have_fields == 3 and self.core_no < self.cores.len) { + if (self.core_no > 0) { + // Deduplicate the core info. + for (self.cores[0..self.core_no]) |it| { + if (std.meta.eql(it, self.cores[self.core_no])) + return; + } + } + self.core_no += 1; + } + } + + fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu { + if (self.core_no == 0) return null; + + const is_64bit = switch (arch) { + .aarch64, .aarch64_be, .aarch64_32 => true, + else => false, + }; + + var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; + for (self.cores[0..self.core_no]) |core, i| { + known_models[i] = cpu_models.isKnown(core, is_64bit); + } + + // XXX We pick the first core on big.LITTLE systems, hopefully the + // LITTLE one. + const model = known_models[0] orelse return null; + return Target.Cpu{ .arch = arch, - .model = Target.Cpu.Model.generic(arch), - .features = Target.Cpu.Feature.Set.empty, + .model = model, + .features = model.features, + }; + } +}; + +const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl); + +fn CpuInfoParser(comptime impl: anytype) type { + return struct { + fn parse(arch: Target.Cpu.Arch) !?Target.Cpu { + var obj: impl = .{}; + var out_buf: [2][max_value_len]u8 = undefined; + + var i: usize = 0; + while (i < getCpuCount()) : (i += 1) { + try getCpuInfoFromRegistry(i, 2, .{ + .{ .key = "CP 4000", .value = REG.QWORD }, + .{ .key = "Identifier", .value = REG.SZ }, + }, &out_buf); + + const cp_4000 = out_buf[0][0..8]; + const identifier = mem.sliceTo(out_buf[1][0..], 0); + + try obj.parseDataHook(.{ + .cp_4000 = cp_4000, + .identifier = identifier, + }); + } + + return obj.finalize(arch); + } + }; +} + +fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu { + return .{ + .arch = arch, + .model = Target.Cpu.Model.generic(arch), + .features = Target.Cpu.Feature.Set.empty, + }; +} + +pub fn detectNativeCpuAndFeatures() ?Target.Cpu { + const current_arch = builtin.cpu.arch; + switch (current_arch) { + .aarch64, .aarch64_be, .aarch64_32 => { + var cpu = cpu: { + var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch); + break :cpu maybe_cpu orelse genericCpu(current_arch); + }; + + const Feature = Target.aarch64.Feature; + + // Override any features that are either present or absent + if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.neon)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.neon)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crc)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crc)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crypto)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crypto)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.lse)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.lse)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.dotprod)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.dotprod)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.jsconv)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.jsconv)); + } + + return cpu; }, + else => {}, } }