Merge pull request #13701 from ziglang/arm-win-more-features

Improve aarch64 feature detection based on the readouts from privileged system registers
This commit is contained in:
Jakub Konka 2022-11-30 00:20:38 +01:00 committed by Andrew Kelley
parent 0313426ce8
commit fa30458e84
2 changed files with 275 additions and 181 deletions

View File

@ -1,4 +1,5 @@
const std = @import("std");
const Target = std.Target;
pub const CoreInfo = struct {
architecture: u8 = 0,
@ -9,14 +10,14 @@ pub const CoreInfo = struct {
pub const cpu_models = struct {
// Shorthands to simplify the tables below.
const A32 = std.Target.arm.cpu;
const A64 = std.Target.aarch64.cpu;
const A32 = Target.arm.cpu;
const A64 = Target.aarch64.cpu;
const E = struct {
part: u16,
variant: ?u8 = null, // null if matches any variant
m32: ?*const std.Target.Cpu.Model = null,
m64: ?*const std.Target.Cpu.Model = null,
m32: ?*const Target.Cpu.Model = null,
m64: ?*const Target.Cpu.Model = null,
};
// implementer = 0x41
@ -59,7 +60,6 @@ pub const cpu_models = struct {
E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null },
E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
// This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html
E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c },
E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
@ -111,7 +111,7 @@ pub const cpu_models = struct {
E{ .part = 0xc01, .m64 = &A64.saphira },
};
pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model {
pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
const models = switch (core.implementer) {
0x41 => &ARM,
0x42 => &Broadcom,
@ -132,3 +132,174 @@ pub const cpu_models = struct {
return null;
}
};
pub const aarch64 = struct {
fn setFeature(cpu: *Target.Cpu, feature: Target.aarch64.Feature, enabled: bool) void {
const idx = @as(Target.Cpu.Feature.Set.Index, @enumToInt(feature));
if (enabled) cpu.features.addFeature(idx) else cpu.features.removeFeature(idx);
}
inline fn bitField(input: u64, offset: u6) u4 {
return @truncate(u4, input >> offset);
}
/// Input array should consist of readouts from 12 system registers such that:
/// 0 -> MIDR_EL1
/// 1 -> ID_AA64PFR0_EL1
/// 2 -> ID_AA64PFR1_EL1
/// 3 -> ID_AA64DFR0_EL1
/// 4 -> ID_AA64DFR1_EL1
/// 5 -> ID_AA64AFR0_EL1
/// 6 -> ID_AA64AFR1_EL1
/// 7 -> ID_AA64ISAR0_EL1
/// 8 -> ID_AA64ISAR1_EL1
/// 9 -> ID_AA64MMFR0_EL1
/// 10 -> ID_AA64MMFR1_EL1
/// 11 -> ID_AA64MMFR2_EL1
pub fn detectNativeCpuAndFeatures(arch: Target.Cpu.Arch, registers: [12]u64) ?Target.Cpu {
const info = detectNativeCoreInfo(registers[0]);
const model = cpu_models.isKnown(info, true) orelse return null;
var cpu = Target.Cpu{
.arch = arch,
.model = model,
.features = Target.Cpu.Feature.Set.empty,
};
detectNativeCpuFeatures(&cpu, registers[1..12]);
addInstructionFusions(&cpu, info);
return cpu;
}
/// Takes readout of MIDR_EL1 register as input.
fn detectNativeCoreInfo(midr: u64) CoreInfo {
var info = CoreInfo{
.implementer = @truncate(u8, midr >> 24),
.part = @truncate(u12, midr >> 4),
};
blk: {
if (info.implementer == 0x41) {
// ARM Ltd.
const special_bits = @truncate(u4, info.part >> 8);
if (special_bits == 0x0 or special_bits == 0x7) {
// TODO Variant and arch encoded differently.
break :blk;
}
}
info.variant |= @intCast(u8, @truncate(u4, midr >> 20)) << 4;
info.variant |= @truncate(u4, midr);
info.architecture = @truncate(u4, midr >> 16);
}
return info;
}
/// Input array should consist of readouts from 11 system registers such that:
/// 0 -> ID_AA64PFR0_EL1
/// 1 -> ID_AA64PFR1_EL1
/// 2 -> ID_AA64DFR0_EL1
/// 3 -> ID_AA64DFR1_EL1
/// 4 -> ID_AA64AFR0_EL1
/// 5 -> ID_AA64AFR1_EL1
/// 6 -> ID_AA64ISAR0_EL1
/// 7 -> ID_AA64ISAR1_EL1
/// 8 -> ID_AA64MMFR0_EL1
/// 9 -> ID_AA64MMFR1_EL1
/// 10 -> ID_AA64MMFR2_EL1
fn detectNativeCpuFeatures(cpu: *Target.Cpu, registers: *const [11]u64) void {
// ID_AA64PFR0_EL1
setFeature(cpu, .dit, bitField(registers[0], 48) >= 1);
setFeature(cpu, .am, bitField(registers[0], 44) >= 1);
setFeature(cpu, .amvs, bitField(registers[0], 44) >= 2);
setFeature(cpu, .mpam, bitField(registers[0], 40) >= 1); // MPAM v1.0
setFeature(cpu, .sel2, bitField(registers[0], 36) >= 1);
setFeature(cpu, .sve, bitField(registers[0], 32) >= 1);
setFeature(cpu, .el3, bitField(registers[0], 12) >= 1);
setFeature(cpu, .ras, bitField(registers[0], 28) >= 1);
if (bitField(registers[0], 20) < 0xF) blk: {
if (bitField(registers[0], 16) != bitField(registers[0], 20)) break :blk; // This should never occur
setFeature(cpu, .neon, true);
setFeature(cpu, .fp_armv8, true);
setFeature(cpu, .fullfp16, bitField(registers[0], 20) > 0);
}
// ID_AA64PFR1_EL1
setFeature(cpu, .mpam, bitField(registers[1], 16) > 0 and bitField(registers[0], 40) == 0); // MPAM v0.1
setFeature(cpu, .mte, bitField(registers[1], 8) >= 1);
setFeature(cpu, .ssbs, bitField(registers[1], 4) >= 1);
setFeature(cpu, .bti, bitField(registers[1], 0) >= 1);
// ID_AA64DFR0_EL1
setFeature(cpu, .tracev8_4, bitField(registers[2], 40) >= 1);
setFeature(cpu, .spe, bitField(registers[2], 32) >= 1);
setFeature(cpu, .perfmon, bitField(registers[2], 8) >= 1 and bitField(registers[2], 8) < 0xF);
// ID_AA64DFR1_EL1 reserved
// ID_AA64AFR0_EL1 reserved / implementation defined
// ID_AA64AFR1_EL1 reserved
// ID_AA64ISAR0_EL1
setFeature(cpu, .rand, bitField(registers[6], 60) >= 1);
setFeature(cpu, .tlb_rmi, bitField(registers[6], 56) >= 1);
setFeature(cpu, .flagm, bitField(registers[6], 52) >= 1);
setFeature(cpu, .fp16fml, bitField(registers[6], 48) >= 1);
setFeature(cpu, .dotprod, bitField(registers[6], 44) >= 1);
setFeature(cpu, .sm4, bitField(registers[6], 40) >= 1 and bitField(registers[6], 36) >= 1);
setFeature(cpu, .sha3, bitField(registers[6], 32) >= 1 and bitField(registers[6], 12) >= 2);
setFeature(cpu, .rdm, bitField(registers[6], 28) >= 1);
setFeature(cpu, .lse, bitField(registers[6], 20) >= 1);
setFeature(cpu, .crc, bitField(registers[6], 16) >= 1);
setFeature(cpu, .sha2, bitField(registers[6], 12) >= 1 and bitField(registers[6], 8) >= 1);
setFeature(cpu, .aes, bitField(registers[6], 4) >= 1);
// ID_AA64ISAR1_EL1
setFeature(cpu, .i8mm, bitField(registers[7], 52) >= 1);
setFeature(cpu, .bf16, bitField(registers[7], 44) >= 1);
setFeature(cpu, .predres, bitField(registers[7], 40) >= 1);
setFeature(cpu, .sb, bitField(registers[7], 36) >= 1);
setFeature(cpu, .fptoint, bitField(registers[7], 32) >= 1);
setFeature(cpu, .rcpc, bitField(registers[7], 20) >= 1);
setFeature(cpu, .rcpc_immo, bitField(registers[7], 20) >= 2);
setFeature(cpu, .complxnum, bitField(registers[7], 16) >= 1);
setFeature(cpu, .jsconv, bitField(registers[7], 12) >= 1);
setFeature(cpu, .pauth, bitField(registers[7], 8) >= 1 or bitField(registers[7], 4) >= 1);
setFeature(cpu, .ccpp, bitField(registers[7], 0) >= 1);
setFeature(cpu, .ccdp, bitField(registers[7], 0) >= 2);
// ID_AA64MMFR0_EL1
setFeature(cpu, .ecv, bitField(registers[8], 60) >= 1);
setFeature(cpu, .fgt, bitField(registers[8], 56) >= 1);
// ID_AA64MMFR1_EL1
setFeature(cpu, .pan, bitField(registers[9], 20) >= 1);
setFeature(cpu, .pan_rwv, bitField(registers[9], 20) >= 2);
setFeature(cpu, .lor, bitField(registers[9], 16) >= 1);
setFeature(cpu, .vh, bitField(registers[9], 8) >= 1);
setFeature(cpu, .contextidr_el2, bitField(registers[9], 8) >= 1);
// ID_AA64MMFR2_EL1
setFeature(cpu, .nv, bitField(registers[10], 24) >= 1);
setFeature(cpu, .ccidx, bitField(registers[10], 20) >= 1);
setFeature(cpu, .uaops, bitField(registers[10], 4) >= 1);
}
fn addInstructionFusions(cpu: *Target.Cpu, info: CoreInfo) void {
switch (info.implementer) {
0x41 => switch (info.part) {
0xd4b, 0xd4c => {
// According to A78C/X1C Core Software Optimization Guide, CPU fuses certain instructions.
setFeature(cpu, .cmp_bcc_fusion, true);
setFeature(cpu, .fuse_aes, true);
},
else => {},
},
else => {},
}
}
};

View File

@ -51,23 +51,22 @@ pub fn detectRuntimeVersion() WindowsVersion {
// https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits
const max_value_len = 2048;
const RegistryPair = struct {
key: []const u8,
value: std.os.windows.ULONG,
};
fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
const ArgsType = @TypeOf(args);
const args_type_info = @typeInfo(ArgsType);
if (args_type_info != .Struct) {
@compileError("expected tuple or struct argument, found " ++ @typeName(ArgsType));
}
const fields_info = args_type_info.Struct.fields;
fn getCpuInfoFromRegistry(
core: usize,
comptime pairs_num: comptime_int,
comptime pairs: [pairs_num]RegistryPair,
out_buf: *[pairs_num][max_value_len]u8,
) !void {
// Originally, I wanted to issue a single call with a more complex table structure such that we
// would sequentially visit each CPU#d subkey in the registry and pull the value of interest into
// a buffer, however, NT seems to be expecting a single buffer per each table meaning we would
// end up pulling only the last CPU core info, overwriting everything else.
// If anyone can come up with a solution to this, please do!
const table_size = 1 + pairs.len;
const table_size = 1 + fields_info.len;
var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined;
const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor");
@ -90,9 +89,9 @@ fn getCpuInfoFromRegistry(
.DefaultLength = 0,
};
inline for (pairs) |pair, i| {
inline for (fields_info) |field, i| {
const ctx: *anyopaque = blk: {
switch (pair.value) {
switch (@field(args, field.name).value_type) {
REG.SZ,
REG.EXPAND_SZ,
REG.MULTI_SZ,
@ -121,12 +120,15 @@ fn getCpuInfoFromRegistry(
else => unreachable,
}
};
const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key);
var key_buf: [max_value_len / 2 + 1]u16 = undefined;
const key_len = try std.unicode.utf8ToUtf16Le(&key_buf, @field(args, field.name).key);
key_buf[key_len] = 0;
table[i + 1] = .{
.QueryRoutine = null,
.Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED,
.Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)),
.Name = key_buf[0..key_len :0],
.EntryContext = ctx,
.DefaultType = REG.NONE,
.DefaultData = null,
@ -154,16 +156,15 @@ fn getCpuInfoFromRegistry(
);
switch (res) {
.SUCCESS => {
inline for (pairs) |pair, i| switch (pair.value) {
REG.NONE => unreachable,
inline for (fields_info) |field, i| switch (@field(args, field.name).value_type) {
REG.SZ,
REG.EXPAND_SZ,
REG.MULTI_SZ,
=> {
var buf = @field(args, field.name).value_buf;
const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[i + 1].EntryContext);
const len = try std.unicode.utf16leToUtf8(out_buf[i][0..], entry.Buffer[0 .. entry.Length / 2]);
out_buf[i][len] = 0;
const len = try std.unicode.utf16leToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);
buf[len] = 0;
},
REG.DWORD,
@ -171,12 +172,12 @@ fn getCpuInfoFromRegistry(
REG.QWORD,
=> {
const entry = @ptrCast([*]align(1) const u8, table[i + 1].EntryContext);
switch (pair.value) {
switch (@field(args, field.name).value_type) {
REG.DWORD, REG.DWORD_BIG_ENDIAN => {
mem.copy(u8, out_buf[i][0..4], entry[0..4]);
mem.copy(u8, @field(args, field.name).value_buf[0..4], entry[0..4]);
},
REG.QWORD => {
mem.copy(u8, out_buf[i][0..8], entry[0..8]);
mem.copy(u8, @field(args, field.name).value_buf[0..8], entry[0..8]);
},
else => unreachable,
}
@ -189,173 +190,95 @@ fn getCpuInfoFromRegistry(
}
}
fn setFeature(comptime Feature: type, cpu: *Target.Cpu, feature: Feature, enabled: bool) void {
const idx = @as(Target.Cpu.Feature.Set.Index, @enumToInt(feature));
if (enabled) cpu.features.addFeature(idx) else cpu.features.removeFeature(idx);
}
fn getCpuCount() usize {
return std.os.windows.peb().NumberOfProcessors;
}
const ArmCpuInfoImpl = struct {
cores: [4]CoreInfo = undefined,
core_no: usize = 0,
have_fields: usize = 0,
const CoreInfo = @import("arm.zig").CoreInfo;
const cpu_models = @import("arm.zig").cpu_models;
const Data = struct {
cp_4000: []const u8,
identifier: []const u8,
};
fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void {
const info = &self.cores[self.core_no];
info.* = .{};
// CPU part
info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4;
self.have_fields += 1;
// CPU implementer
info.implementer = data.cp_4000[3];
self.have_fields += 1;
var tokens = mem.tokenize(u8, data.identifier, " ");
while (tokens.next()) |token| {
if (mem.eql(u8, "Family", token)) {
// CPU architecture
const family = tokens.next() orelse continue;
info.architecture = try std.fmt.parseInt(u8, family, 10);
self.have_fields += 1;
break;
}
} else return;
self.addOne();
}
fn addOne(self: *ArmCpuInfoImpl) void {
if (self.have_fields == 3 and self.core_no < self.cores.len) {
if (self.core_no > 0) {
// Deduplicate the core info.
for (self.cores[0..self.core_no]) |it| {
if (std.meta.eql(it, self.cores[self.core_no]))
return;
}
}
self.core_no += 1;
}
}
fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu {
if (self.core_no == 0) return null;
const is_64bit = switch (arch) {
.aarch64, .aarch64_be, .aarch64_32 => true,
else => false,
};
var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined;
for (self.cores[0..self.core_no]) |core, i| {
known_models[i] = cpu_models.isKnown(core, is_64bit);
}
// XXX We pick the first core on big.LITTLE systems, hopefully the
// LITTLE one.
const model = known_models[0] orelse return null;
return Target.Cpu{
.arch = arch,
.model = model,
.features = model.features,
};
}
};
const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl);
fn CpuInfoParser(comptime impl: anytype) type {
return struct {
fn parse(arch: Target.Cpu.Arch) !?Target.Cpu {
var obj: impl = .{};
var out_buf: [2][max_value_len]u8 = undefined;
var i: usize = 0;
while (i < getCpuCount()) : (i += 1) {
try getCpuInfoFromRegistry(i, 2, .{
.{ .key = "CP 4000", .value = REG.QWORD },
.{ .key = "Identifier", .value = REG.SZ },
}, &out_buf);
const cp_4000 = out_buf[0][0..8];
const identifier = mem.sliceTo(out_buf[1][0..], 0);
try obj.parseDataHook(.{
.cp_4000 = cp_4000,
.identifier = identifier,
});
}
return obj.finalize(arch);
}
};
}
fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu {
return .{
/// If the fine-grained detection of CPU features via Win registry fails,
/// we fallback to a generic CPU model but we override the feature set
/// using `SharedUserData` contents.
/// This is effectively what LLVM does for all ARM chips on Windows.
fn genericCpuAndNativeFeatures(arch: Target.Cpu.Arch) Target.Cpu {
var cpu = Target.Cpu{
.arch = arch,
.model = Target.Cpu.Model.generic(arch),
.features = Target.Cpu.Feature.Set.empty,
};
switch (arch) {
.aarch64, .aarch64_be, .aarch64_32 => {
const Feature = Target.aarch64.Feature;
// Override any features that are either present or absent
setFeature(Feature, &cpu, .neon, IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE));
setFeature(Feature, &cpu, .crc, IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE));
setFeature(Feature, &cpu, .crypto, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
setFeature(Feature, &cpu, .lse, IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE));
setFeature(Feature, &cpu, .dotprod, IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE));
setFeature(Feature, &cpu, .jsconv, IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE));
},
else => {},
}
return cpu;
}
pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
const current_arch = builtin.cpu.arch;
switch (current_arch) {
.aarch64, .aarch64_be, .aarch64_32 => {
var cpu = cpu: {
var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch);
break :cpu maybe_cpu orelse genericCpu(current_arch);
};
const cpu: ?Target.Cpu = switch (current_arch) {
.aarch64, .aarch64_be, .aarch64_32 => blk: {
var cores: [128]Target.Cpu = undefined;
const core_count = getCpuCount();
const Feature = Target.aarch64.Feature;
if (core_count > cores.len) break :blk null;
// Override any features that are either present or absent
if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) {
cpu.features.addFeature(@enumToInt(Feature.neon));
} else {
cpu.features.removeFeature(@enumToInt(Feature.neon));
var i: usize = 0;
while (i < core_count) : (i += 1) {
// Backing datastore
var registers: [12]u64 = undefined;
// Registry key to system ID register mapping
// CP 4000 -> MIDR_EL1
// CP 4020 -> ID_AA64PFR0_EL1
// CP 4021 -> ID_AA64PFR1_EL1
// CP 4028 -> ID_AA64DFR0_EL1
// CP 4029 -> ID_AA64DFR1_EL1
// CP 402C -> ID_AA64AFR0_EL1
// CP 402D -> ID_AA64AFR1_EL1
// CP 4030 -> ID_AA64ISAR0_EL1
// CP 4031 -> ID_AA64ISAR1_EL1
// CP 4038 -> ID_AA64MMFR0_EL1
// CP 4039 -> ID_AA64MMFR1_EL1
// CP 403A -> ID_AA64MMFR2_EL1
getCpuInfoFromRegistry(i, .{
.{ .key = "CP 4000", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[0]) },
.{ .key = "CP 4020", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[1]) },
.{ .key = "CP 4021", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[2]) },
.{ .key = "CP 4028", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[3]) },
.{ .key = "CP 4029", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[4]) },
.{ .key = "CP 402C", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[5]) },
.{ .key = "CP 402D", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[6]) },
.{ .key = "CP 4030", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[7]) },
.{ .key = "CP 4031", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[8]) },
.{ .key = "CP 4038", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[9]) },
.{ .key = "CP 4039", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[10]) },
.{ .key = "CP 403A", .value_type = REG.QWORD, .value_buf = @ptrCast(*[8]u8, &registers[11]) },
}) catch break :blk null;
cores[i] = @import("arm.zig").aarch64.detectNativeCpuAndFeatures(current_arch, registers) orelse
break :blk null;
}
if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
cpu.features.addFeature(@enumToInt(Feature.crc));
} else {
cpu.features.removeFeature(@enumToInt(Feature.crc));
}
if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
cpu.features.addFeature(@enumToInt(Feature.crypto));
} else {
cpu.features.removeFeature(@enumToInt(Feature.crypto));
}
if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) {
cpu.features.addFeature(@enumToInt(Feature.lse));
} else {
cpu.features.removeFeature(@enumToInt(Feature.lse));
}
if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
cpu.features.addFeature(@enumToInt(Feature.dotprod));
} else {
cpu.features.removeFeature(@enumToInt(Feature.dotprod));
}
if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) {
cpu.features.addFeature(@enumToInt(Feature.jsconv));
} else {
cpu.features.removeFeature(@enumToInt(Feature.jsconv));
}
return cpu;
// Pick the first core, usually LITTLE in big.LITTLE architecture.
break :blk cores[0];
},
else => {},
}
else => null,
};
return cpu orelse genericCpuAndNativeFeatures(current_arch);
}