mirror of
https://github.com/ziglang/zig.git
synced 2025-12-07 23:03:08 +00:00
350 lines
15 KiB
Zig
350 lines
15 KiB
Zig
const std = @import("std");
|
|
const Target = std.Target;
|
|
|
|
pub const CoreInfo = struct {
|
|
architecture: u8 = 0,
|
|
implementer: u8 = 0,
|
|
variant: u8 = 0,
|
|
part: u16 = 0,
|
|
};
|
|
|
|
pub const cpu_models = struct {
|
|
// Shorthands to simplify the tables below.
|
|
const A32 = Target.arm.cpu;
|
|
const A64 = Target.aarch64.cpu;
|
|
|
|
const E = struct {
|
|
part: u16,
|
|
variant: ?u8 = null, // null if matches any variant
|
|
m32: ?*const Target.Cpu.Model = null,
|
|
m64: ?*const Target.Cpu.Model = null,
|
|
};
|
|
|
|
// implementer = 0x41
|
|
const ARM = [_]E{
|
|
E{ .part = 0x926, .m32 = &A32.arm926ej_s },
|
|
E{ .part = 0xb02, .m32 = &A32.mpcore },
|
|
E{ .part = 0xb36, .m32 = &A32.arm1136j_s },
|
|
E{ .part = 0xb56, .m32 = &A32.arm1156t2_s },
|
|
E{ .part = 0xb76, .m32 = &A32.arm1176jz_s },
|
|
E{ .part = 0xc05, .m32 = &A32.cortex_a5 },
|
|
E{ .part = 0xc07, .m32 = &A32.cortex_a7 },
|
|
E{ .part = 0xc08, .m32 = &A32.cortex_a8 },
|
|
E{ .part = 0xc09, .m32 = &A32.cortex_a9 },
|
|
E{ .part = 0xc0d, .m32 = &A32.cortex_a17 },
|
|
E{ .part = 0xc0e, .m32 = &A32.cortex_a17 },
|
|
E{ .part = 0xc0f, .m32 = &A32.cortex_a15 },
|
|
E{ .part = 0xc14, .m32 = &A32.cortex_r4 },
|
|
E{ .part = 0xc15, .m32 = &A32.cortex_r5 },
|
|
E{ .part = 0xc17, .m32 = &A32.cortex_r7 },
|
|
E{ .part = 0xc18, .m32 = &A32.cortex_r8 },
|
|
E{ .part = 0xc20, .m32 = &A32.cortex_m0 },
|
|
E{ .part = 0xc21, .m32 = &A32.cortex_m1 },
|
|
E{ .part = 0xc23, .m32 = &A32.cortex_m3 },
|
|
E{ .part = 0xc24, .m32 = &A32.cortex_m4 },
|
|
E{ .part = 0xc27, .m32 = &A32.cortex_m7 },
|
|
E{ .part = 0xc60, .m32 = &A32.cortex_m0plus },
|
|
E{ .part = 0xd01, .m32 = &A32.cortex_a32 },
|
|
E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
|
|
E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 },
|
|
E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 },
|
|
E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 },
|
|
E{ .part = 0xd06, .m64 = &A64.cortex_a65 },
|
|
E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 },
|
|
E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 },
|
|
E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 },
|
|
E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 },
|
|
E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 },
|
|
E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 },
|
|
E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 },
|
|
E{ .part = 0xd0e, .m32 = &A32.cortex_a76ae, .m64 = &A64.cortex_a76ae },
|
|
E{ .part = 0xd13, .m32 = &A32.cortex_r52 },
|
|
E{ .part = 0xd14, .m64 = &A64.cortex_r82ae },
|
|
E{ .part = 0xd15, .m64 = &A64.cortex_r82 },
|
|
E{ .part = 0xd16, .m32 = &A32.cortex_r52plus },
|
|
E{ .part = 0xd20, .m32 = &A32.cortex_m23 },
|
|
E{ .part = 0xd21, .m32 = &A32.cortex_m33 },
|
|
E{ .part = 0xd40, .m32 = &A32.neoverse_v1, .m64 = &A64.neoverse_v1 },
|
|
E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
|
|
E{ .part = 0xd42, .m32 = &A32.cortex_a78ae, .m64 = &A64.cortex_a78ae },
|
|
E{ .part = 0xd43, .m64 = &A64.cortex_a65ae },
|
|
E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
|
|
E{ .part = 0xd46, .m64 = &A64.cortex_a510 },
|
|
E{ .part = 0xd47, .m32 = &A32.cortex_a710, .m64 = &A64.cortex_a710 },
|
|
E{ .part = 0xd48, .m64 = &A64.cortex_x2 },
|
|
E{ .part = 0xd49, .m32 = &A32.neoverse_n2, .m64 = &A64.neoverse_n2 },
|
|
E{ .part = 0xd4a, .m64 = &A64.neoverse_e1 },
|
|
E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
|
|
E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c },
|
|
E{ .part = 0xd4d, .m64 = &A64.cortex_a715 },
|
|
E{ .part = 0xd4e, .m64 = &A64.cortex_x3 },
|
|
E{ .part = 0xd4f, .m64 = &A64.neoverse_v2 },
|
|
E{ .part = 0xd80, .m64 = &A64.cortex_a520 },
|
|
E{ .part = 0xd81, .m64 = &A64.cortex_a720 },
|
|
E{ .part = 0xd82, .m64 = &A64.cortex_x4 },
|
|
E{ .part = 0xd83, .m64 = &A64.neoverse_v3ae },
|
|
E{ .part = 0xd84, .m64 = &A64.neoverse_v3 },
|
|
E{ .part = 0xd85, .m64 = &A64.cortex_x925 },
|
|
E{ .part = 0xd87, .m64 = &A64.cortex_a725 },
|
|
E{ .part = 0xd88, .m64 = &A64.cortex_a520ae },
|
|
E{ .part = 0xd89, .m64 = &A64.cortex_a720ae },
|
|
E{ .part = 0xd8e, .m64 = &A64.neoverse_n3 },
|
|
E{ .part = 0xd8f, .m64 = &A64.cortex_a320 },
|
|
};
|
|
// implementer = 0x42
|
|
const Broadcom = [_]E{
|
|
E{ .part = 0x516, .m64 = &A64.thunderx2t99 },
|
|
};
|
|
// implementer = 0x43
|
|
const Cavium = [_]E{
|
|
E{ .part = 0x0a0, .m64 = &A64.thunderx },
|
|
E{ .part = 0x0a2, .m64 = &A64.thunderxt81 },
|
|
E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
|
|
E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
|
|
E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
|
|
};
|
|
// implementer = 0x46
|
|
const Fujitsu = [_]E{
|
|
E{ .part = 0x001, .m64 = &A64.a64fx },
|
|
};
|
|
// implementer = 0x48
|
|
const HiSilicon = [_]E{
|
|
E{ .part = 0xd01, .m64 = &A64.tsv110 },
|
|
};
|
|
// implementer = 0x4e
|
|
const Nvidia = [_]E{
|
|
E{ .part = 0x004, .m64 = &A64.carmel },
|
|
E{ .part = 0x010, .m64 = &A64.olympus },
|
|
};
|
|
// implementer = 0x50
|
|
const Ampere = [_]E{
|
|
E{ .part = 0x000, .variant = 3, .m64 = &A64.emag },
|
|
E{ .part = 0x000, .m64 = &A64.xgene1 },
|
|
};
|
|
// implementer = 0x51
|
|
const Qualcomm = [_]E{
|
|
E{ .part = 0x001, .m64 = &A64.oryon_1 },
|
|
E{ .part = 0x06f, .m32 = &A32.krait },
|
|
E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo },
|
|
E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo },
|
|
E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo },
|
|
E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
|
|
E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
|
|
E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
|
|
E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
|
|
E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
|
|
E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
|
|
E{ .part = 0xc00, .m64 = &A64.falkor },
|
|
E{ .part = 0xc01, .m64 = &A64.saphira },
|
|
};
|
|
// implementer = 0x61
|
|
const Apple = [_]E{
|
|
E{ .part = 0x022, .m64 = &A64.apple_m1 },
|
|
E{ .part = 0x023, .m64 = &A64.apple_m1 },
|
|
E{ .part = 0x024, .m64 = &A64.apple_m1 },
|
|
E{ .part = 0x025, .m64 = &A64.apple_m1 },
|
|
E{ .part = 0x028, .m64 = &A64.apple_m1 },
|
|
E{ .part = 0x029, .m64 = &A64.apple_m1 },
|
|
E{ .part = 0x032, .m64 = &A64.apple_m2 },
|
|
E{ .part = 0x033, .m64 = &A64.apple_m2 },
|
|
E{ .part = 0x034, .m64 = &A64.apple_m2 },
|
|
E{ .part = 0x035, .m64 = &A64.apple_m2 },
|
|
E{ .part = 0x038, .m64 = &A64.apple_m2 },
|
|
E{ .part = 0x039, .m64 = &A64.apple_m2 },
|
|
};
|
|
|
|
pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
|
|
const models = switch (core.implementer) {
|
|
0x41 => &ARM,
|
|
0x42 => &Broadcom,
|
|
0x43 => &Cavium,
|
|
0x46 => &Fujitsu,
|
|
0x48 => &HiSilicon,
|
|
0x4e => &Nvidia,
|
|
0x50 => &Ampere,
|
|
0x51 => &Qualcomm,
|
|
0x61 => &Apple,
|
|
else => return null,
|
|
};
|
|
|
|
for (models) |model| {
|
|
if (model.part == core.part and
|
|
(model.variant == null or model.variant.? == core.variant))
|
|
return if (is_64bit) model.m64 else model.m32;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
};
|
|
|
|
pub const aarch64 = struct {
|
|
fn setFeature(cpu: *Target.Cpu, feature: Target.aarch64.Feature, enabled: bool) void {
|
|
const idx = @as(Target.Cpu.Feature.Set.Index, @intFromEnum(feature));
|
|
|
|
if (enabled) cpu.features.addFeature(idx) else cpu.features.removeFeature(idx);
|
|
}
|
|
|
|
inline fn bitField(input: u64, offset: u6) u4 {
|
|
return @as(u4, @truncate(input >> offset));
|
|
}
|
|
|
|
/// Input array should consist of readouts from 12 system registers such that:
|
|
/// 0 -> MIDR_EL1
|
|
/// 1 -> ID_AA64PFR0_EL1
|
|
/// 2 -> ID_AA64PFR1_EL1
|
|
/// 3 -> ID_AA64DFR0_EL1
|
|
/// 4 -> ID_AA64DFR1_EL1
|
|
/// 5 -> ID_AA64AFR0_EL1
|
|
/// 6 -> ID_AA64AFR1_EL1
|
|
/// 7 -> ID_AA64ISAR0_EL1
|
|
/// 8 -> ID_AA64ISAR1_EL1
|
|
/// 9 -> ID_AA64MMFR0_EL1
|
|
/// 10 -> ID_AA64MMFR1_EL1
|
|
/// 11 -> ID_AA64MMFR2_EL1
|
|
pub fn detectNativeCpuAndFeatures(arch: Target.Cpu.Arch, registers: [12]u64) ?Target.Cpu {
|
|
const info = detectNativeCoreInfo(registers[0]);
|
|
const model = cpu_models.isKnown(info, true) orelse return null;
|
|
|
|
var cpu = Target.Cpu{
|
|
.arch = arch,
|
|
.model = model,
|
|
.features = Target.Cpu.Feature.Set.empty,
|
|
};
|
|
|
|
detectNativeCpuFeatures(&cpu, registers[1..12]);
|
|
addInstructionFusions(&cpu, info);
|
|
|
|
return cpu;
|
|
}
|
|
|
|
/// Takes readout of MIDR_EL1 register as input.
|
|
fn detectNativeCoreInfo(midr: u64) CoreInfo {
|
|
var info = CoreInfo{
|
|
.implementer = @as(u8, @truncate(midr >> 24)),
|
|
.part = @as(u12, @truncate(midr >> 4)),
|
|
};
|
|
|
|
blk: {
|
|
if (info.implementer == 0x41) {
|
|
// ARM Ltd.
|
|
const special_bits: u4 = @truncate(info.part >> 8);
|
|
if (special_bits == 0x0 or special_bits == 0x7) {
|
|
// TODO Variant and arch encoded differently.
|
|
break :blk;
|
|
}
|
|
}
|
|
|
|
info.variant |= @as(u8, @intCast(@as(u4, @truncate(midr >> 20)))) << 4;
|
|
info.variant |= @as(u4, @truncate(midr));
|
|
info.architecture = @as(u4, @truncate(midr >> 16));
|
|
}
|
|
|
|
return info;
|
|
}
|
|
|
|
/// Input array should consist of readouts from 11 system registers such that:
|
|
/// 0 -> ID_AA64PFR0_EL1
|
|
/// 1 -> ID_AA64PFR1_EL1
|
|
/// 2 -> ID_AA64DFR0_EL1
|
|
/// 3 -> ID_AA64DFR1_EL1
|
|
/// 4 -> ID_AA64AFR0_EL1
|
|
/// 5 -> ID_AA64AFR1_EL1
|
|
/// 6 -> ID_AA64ISAR0_EL1
|
|
/// 7 -> ID_AA64ISAR1_EL1
|
|
/// 8 -> ID_AA64MMFR0_EL1
|
|
/// 9 -> ID_AA64MMFR1_EL1
|
|
/// 10 -> ID_AA64MMFR2_EL1
|
|
fn detectNativeCpuFeatures(cpu: *Target.Cpu, registers: *const [11]u64) void {
|
|
// ID_AA64PFR0_EL1
|
|
setFeature(cpu, .dit, bitField(registers[0], 48) >= 1);
|
|
setFeature(cpu, .am, bitField(registers[0], 44) >= 1);
|
|
setFeature(cpu, .amvs, bitField(registers[0], 44) >= 2);
|
|
setFeature(cpu, .mpam, bitField(registers[0], 40) >= 1); // MPAM v1.0
|
|
setFeature(cpu, .sel2, bitField(registers[0], 36) >= 1);
|
|
setFeature(cpu, .sve, bitField(registers[0], 32) >= 1);
|
|
setFeature(cpu, .el3, bitField(registers[0], 12) >= 1);
|
|
setFeature(cpu, .ras, bitField(registers[0], 28) >= 1);
|
|
|
|
if (bitField(registers[0], 20) < 0xF) blk: {
|
|
if (bitField(registers[0], 16) != bitField(registers[0], 20)) break :blk; // This should never occur
|
|
|
|
setFeature(cpu, .neon, true);
|
|
setFeature(cpu, .fp_armv8, true);
|
|
setFeature(cpu, .fullfp16, bitField(registers[0], 20) > 0);
|
|
}
|
|
|
|
// ID_AA64PFR1_EL1
|
|
setFeature(cpu, .mpam, bitField(registers[1], 16) > 0 and bitField(registers[0], 40) == 0); // MPAM v0.1
|
|
setFeature(cpu, .mte, bitField(registers[1], 8) >= 1);
|
|
setFeature(cpu, .ssbs, bitField(registers[1], 4) >= 1);
|
|
setFeature(cpu, .bti, bitField(registers[1], 0) >= 1);
|
|
|
|
// ID_AA64DFR0_EL1
|
|
setFeature(cpu, .tracev8_4, bitField(registers[2], 40) >= 1);
|
|
setFeature(cpu, .spe, bitField(registers[2], 32) >= 1);
|
|
setFeature(cpu, .perfmon, bitField(registers[2], 8) >= 1 and bitField(registers[2], 8) < 0xF);
|
|
|
|
// ID_AA64DFR1_EL1 reserved
|
|
// ID_AA64AFR0_EL1 reserved / implementation defined
|
|
// ID_AA64AFR1_EL1 reserved
|
|
|
|
// ID_AA64ISAR0_EL1
|
|
setFeature(cpu, .rand, bitField(registers[6], 60) >= 1);
|
|
setFeature(cpu, .tlb_rmi, bitField(registers[6], 56) >= 1);
|
|
setFeature(cpu, .flagm, bitField(registers[6], 52) >= 1);
|
|
setFeature(cpu, .fp16fml, bitField(registers[6], 48) >= 1);
|
|
setFeature(cpu, .dotprod, bitField(registers[6], 44) >= 1);
|
|
setFeature(cpu, .sm4, bitField(registers[6], 40) >= 1 and bitField(registers[6], 36) >= 1);
|
|
setFeature(cpu, .sha3, bitField(registers[6], 32) >= 1 and bitField(registers[6], 12) >= 2);
|
|
setFeature(cpu, .rdm, bitField(registers[6], 28) >= 1);
|
|
setFeature(cpu, .lse, bitField(registers[6], 20) >= 1);
|
|
setFeature(cpu, .crc, bitField(registers[6], 16) >= 1);
|
|
setFeature(cpu, .sha2, bitField(registers[6], 12) >= 1 and bitField(registers[6], 8) >= 1);
|
|
setFeature(cpu, .aes, bitField(registers[6], 4) >= 1);
|
|
|
|
// ID_AA64ISAR1_EL1
|
|
setFeature(cpu, .i8mm, bitField(registers[7], 52) >= 1);
|
|
setFeature(cpu, .bf16, bitField(registers[7], 44) >= 1);
|
|
setFeature(cpu, .predres, bitField(registers[7], 40) >= 1);
|
|
setFeature(cpu, .sb, bitField(registers[7], 36) >= 1);
|
|
setFeature(cpu, .fptoint, bitField(registers[7], 32) >= 1);
|
|
setFeature(cpu, .rcpc, bitField(registers[7], 20) >= 1);
|
|
setFeature(cpu, .rcpc_immo, bitField(registers[7], 20) >= 2);
|
|
setFeature(cpu, .complxnum, bitField(registers[7], 16) >= 1);
|
|
setFeature(cpu, .jsconv, bitField(registers[7], 12) >= 1);
|
|
setFeature(cpu, .pauth, bitField(registers[7], 8) >= 1 or bitField(registers[7], 4) >= 1);
|
|
setFeature(cpu, .ccpp, bitField(registers[7], 0) >= 1);
|
|
setFeature(cpu, .ccdp, bitField(registers[7], 0) >= 2);
|
|
|
|
// ID_AA64MMFR0_EL1
|
|
setFeature(cpu, .ecv, bitField(registers[8], 60) >= 1);
|
|
setFeature(cpu, .fgt, bitField(registers[8], 56) >= 1);
|
|
|
|
// ID_AA64MMFR1_EL1
|
|
setFeature(cpu, .pan, bitField(registers[9], 20) >= 1);
|
|
setFeature(cpu, .pan_rwv, bitField(registers[9], 20) >= 2);
|
|
setFeature(cpu, .lor, bitField(registers[9], 16) >= 1);
|
|
setFeature(cpu, .vh, bitField(registers[9], 8) >= 1);
|
|
setFeature(cpu, .contextidr_el2, bitField(registers[9], 8) >= 1);
|
|
|
|
// ID_AA64MMFR2_EL1
|
|
setFeature(cpu, .nv, bitField(registers[10], 24) >= 1);
|
|
setFeature(cpu, .ccidx, bitField(registers[10], 20) >= 1);
|
|
setFeature(cpu, .uaops, bitField(registers[10], 4) >= 1);
|
|
}
|
|
|
|
fn addInstructionFusions(cpu: *Target.Cpu, info: CoreInfo) void {
|
|
switch (info.implementer) {
|
|
0x41 => switch (info.part) {
|
|
0xd4b, 0xd4c => {
|
|
// According to A78C/X1C Core Software Optimization Guide, CPU fuses certain instructions.
|
|
setFeature(cpu, .cmp_bcc_fusion, true);
|
|
setFeature(cpu, .fuse_aes, true);
|
|
},
|
|
else => {},
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
};
|