zig/src/codegen/x86_64/abi.zig
2025-10-02 15:59:51 -04:00

610 lines
24 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

pub const Class = enum {
/// INTEGER: This class consists of integral types that fit into one of the general
/// purpose registers.
integer,
/// SSE: The class consists of types that fit into a vector register.
sse,
/// SSEUP: The class consists of types that fit into a vector register and can be passed
/// and returned in the upper bytes of it.
sseup,
/// X87, X87UP: These classes consist of types that will be returned via the
/// x87 FPU.
x87,
/// The 15-bit exponent, 1-bit sign, and 6 bytes of padding of an `f80`.
x87up,
/// NO_CLASS: This class is used as initializer in the algorithms. It will be used for
/// padding and empty structures and unions.
none,
/// MEMORY: This class consists of types that will be passed and returned in mem-
/// ory via the stack.
memory,
/// Win64 passes 128-bit integers as `Class.memory` but returns them as `Class.sse`.
win_i128,
/// A `Class.sse` containing one `f32`.
float,
/// A `Class.sse` containing two `f32`s.
float_combine,
/// Clang passes each vector element in a separate `Class.integer`, but returns as `Class.memory`.
integer_per_element,
pub const one_integer: [8]Class = .{
.integer, .none, .none, .none,
.none, .none, .none, .none,
};
pub const two_integers: [8]Class = .{
.integer, .integer, .none, .none,
.none, .none, .none, .none,
};
pub const three_integers: [8]Class = .{
.integer, .integer, .integer, .none,
.none, .none, .none, .none,
};
pub const four_integers: [8]Class = .{
.integer, .integer, .integer, .integer,
.none, .none, .none, .none,
};
pub const len_integers: [8]Class = .{
.integer_per_element, .none, .none, .none,
.none, .none, .none, .none,
};
pub const @"f16" = @"f64";
pub const @"f32": [8]Class = .{
.float, .none, .none, .none,
.none, .none, .none, .none,
};
pub const @"f64": [8]Class = .{
.sse, .none, .none, .none,
.none, .none, .none, .none,
};
pub const @"f80": [8]Class = .{
.x87, .x87up, .none, .none,
.none, .none, .none, .none,
};
pub const @"f128": [8]Class = .{
.sse, .sseup, .none, .none,
.none, .none, .none, .none,
};
/// COMPLEX_X87: This class consists of types that will be returned via the x87
/// FPU.
pub const complex_x87: [8]Class = .{
.x87, .x87up, .x87, .x87up,
.none, .none, .none, .none,
};
pub const stack: [8]Class = .{
.memory, .none, .none, .none,
.none, .none, .none, .none,
};
pub fn isX87(class: Class) bool {
return switch (class) {
.x87, .x87up => true,
else => false,
};
}
/// Combine a field class with the prev one.
fn combineSystemV(prev_class: Class, next_class: Class) Class {
// "If both classes are equal, this is the resulting class."
if (prev_class == next_class)
return if (prev_class == .float) .float_combine else prev_class;
// "If one of the classes is NO_CLASS, the resulting class
// is the other class."
if (prev_class == .none) return next_class;
// "If one of the classes is MEMORY, the result is the MEMORY class."
if (prev_class == .memory or next_class == .memory) return .memory;
// "If one of the classes is INTEGER, the result is the INTEGER."
if (prev_class == .integer or next_class == .integer) return .integer;
// "If one of the classes is X87, X87UP, COMPLEX_X87 class,
// MEMORY is used as class."
if (prev_class.isX87() or next_class.isX87()) return .memory;
// "Otherwise class SSE is used."
return .sse;
}
};
pub const Context = enum { ret, arg, other };
pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) Class {
// https://docs.microsoft.com/en-gb/cpp/build/x64-calling-convention?view=vs-2017
// "There's a strict one-to-one correspondence between a function call's arguments
// and the registers used for those arguments. Any argument that doesn't fit in 8
// bytes, or isn't 1, 2, 4, or 8 bytes, must be passed by reference. A single argument
// is never spread across multiple registers."
// "All floating point operations are done using the 16 XMM registers."
// "Structs and unions of size 8, 16, 32, or 64 bits, and __m64 types, are passed
// as if they were integers of the same size."
return switch (ty.zigTypeTag(zcu)) {
.pointer,
.int,
.bool,
.@"enum",
.void,
.noreturn,
.error_set,
.@"struct",
.@"union",
.optional,
.array,
.error_union,
.@"anyframe",
.frame,
=> switch (ty.abiSize(zcu)) {
0 => unreachable,
1, 2, 4, 8 => .integer,
else => switch (ty.zigTypeTag(zcu)) {
.int => .win_i128,
.@"struct", .@"union" => if (ty.containerLayout(zcu) == .@"packed")
.win_i128
else
.memory,
else => .memory,
},
},
.float => switch (ty.floatBits(target)) {
16, 32, 64 => .sse,
80 => .memory,
128 => if (ctx == .arg) .memory else .sse,
else => unreachable,
},
.vector => .sse,
.type,
.comptime_float,
.comptime_int,
.undefined,
.null,
.@"fn",
.@"opaque",
.enum_literal,
=> unreachable,
};
}
/// There are a maximum of 8 possible return slots. Returned values are in
/// the beginning of the array; unused slots are filled with .none.
pub fn classifySystemV(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) [8]Class {
switch (ty.zigTypeTag(zcu)) {
.pointer => switch (ty.ptrSize(zcu)) {
.slice => return Class.two_integers,
else => return Class.one_integer,
},
.int, .@"enum", .error_set => {
const bits = ty.intInfo(zcu).bits;
if (bits <= 64 * 1) return Class.one_integer;
if (bits <= 64 * 2) return Class.two_integers;
if (bits <= 64 * 3) return Class.three_integers;
if (bits <= 64 * 4) return Class.four_integers;
return Class.stack;
},
.bool, .void, .noreturn => return Class.one_integer,
.float => switch (ty.floatBits(target)) {
16 => {
if (ctx == .other) return Class.stack;
// TODO clang doesn't allow __fp16 as .ret or .arg
return Class.f16;
},
32 => return Class.f32,
64 => return Class.f64,
// "Arguments of types __float128, _Decimal128 and __m128 are
// split into two halves. The least significant ones belong
// to class SSE, the most significant one to class SSEUP."
128 => return Class.f128,
// "The 64-bit mantissa of arguments of type long double
// belongs to class X87, the 16-bit exponent plus 6 bytes
// of padding belongs to class X87UP."
80 => return Class.f80,
else => unreachable,
},
.vector => {
const elem_ty = ty.childType(zcu);
const bits = elem_ty.bitSize(zcu) * ty.arrayLen(zcu);
if (elem_ty.toIntern() == .bool_type) {
if (bits <= 32) return Class.one_integer;
if (bits <= 64) return Class.f64;
if (ctx == .other) return Class.stack;
if (bits <= 128) return Class.len_integers;
if (bits <= 256 and target.cpu.has(.x86, .avx)) return Class.len_integers;
if (bits <= 512 and target.cpu.has(.x86, .avx512f)) return Class.len_integers;
return Class.stack;
}
if (elem_ty.isRuntimeFloat() and elem_ty.floatBits(target) == 80) {
if (bits <= 80 * 1) return Class.f80;
if (bits <= 80 * 2) return Class.complex_x87;
return Class.stack;
}
if (bits <= 64 * 1) return .{
.sse, .none, .none, .none,
.none, .none, .none, .none,
};
if (bits <= 64 * 2) return .{
.sse, .sseup, .none, .none,
.none, .none, .none, .none,
};
if (ctx == .arg and !target.cpu.has(.x86, .avx)) return Class.stack;
if (bits <= 64 * 3) return .{
.sse, .sseup, .sseup, .none,
.none, .none, .none, .none,
};
if (bits <= 64 * 4) return .{
.sse, .sseup, .sseup, .sseup,
.none, .none, .none, .none,
};
if (ctx == .arg and !target.cpu.has(.x86, .avx512f)) return Class.stack;
if (bits <= 64 * 5) return .{
.sse, .sseup, .sseup, .sseup,
.sseup, .none, .none, .none,
};
if (bits <= 64 * 6) return .{
.sse, .sseup, .sseup, .sseup,
.sseup, .sseup, .none, .none,
};
if (bits <= 64 * 7) return .{
.sse, .sseup, .sseup, .sseup,
.sseup, .sseup, .sseup, .none,
};
if (bits <= 64 * 8 or (ctx == .ret and bits <= @as(u64, if (target.cpu.has(.x86, .avx512f))
64 * 32
else if (target.cpu.has(.x86, .avx))
64 * 16
else
64 * 8))) return .{
.sse, .sseup, .sseup, .sseup,
.sseup, .sseup, .sseup, .sseup,
};
return Class.stack;
},
.optional => {
if (ty.optionalReprIsPayload(zcu)) {
return classifySystemV(ty.optionalChild(zcu), zcu, target, ctx);
}
return Class.stack;
},
.@"struct", .@"union" => {
// "If the size of an object is larger than eight eightbytes, or
// it contains unaligned fields, it has class MEMORY"
// "If the size of the aggregate exceeds a single eightbyte, each is classified
// separately.".
const ty_size = ty.abiSize(zcu);
switch (ty.containerLayout(zcu)) {
.auto => unreachable,
.@"extern" => {},
.@"packed" => {
if (ty_size <= 8) return Class.one_integer;
if (ty_size <= 16) return Class.two_integers;
unreachable; // frontend should not have allowed this type as extern
},
}
if (ty_size > 64) return Class.stack;
var result: [8]Class = @splat(.none);
_ = if (zcu.typeToStruct(ty)) |loaded_struct|
classifySystemVStruct(&result, 0, loaded_struct, zcu, target)
else if (zcu.typeToUnion(ty)) |loaded_union|
classifySystemVUnion(&result, 0, loaded_union, zcu, target)
else
unreachable;
// Post-merger cleanup
// "If one of the classes is MEMORY, the whole argument is passed in memory"
// "If X87UP is not preceded by X87, the whole argument is passed in memory."
for (result, 0..) |class, i| switch (class) {
.memory => return Class.stack,
.x87up => if (i == 0 or result[i - 1] != .x87) return Class.stack,
else => continue,
};
// "If the size of the aggregate exceeds two eightbytes and the first eight-
// byte isn’t SSE or any other eightbyte isn’t SSEUP, the whole argument
// is passed in memory."
if (ty_size > 16 and (result[0] != .sse or
std.mem.indexOfNone(Class, result[1..], &.{ .sseup, .none }) != null)) return Class.stack;
// "If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE."
for (&result, 0..) |*item, i| {
if (item.* == .sseup) switch (result[i - 1]) {
.sse, .sseup => continue,
else => item.* = .sse,
};
}
return result;
},
.array => {
const ty_size = ty.abiSize(zcu);
if (ty_size <= 8) return Class.one_integer;
if (ty_size <= 16) return Class.two_integers;
return Class.stack;
},
else => unreachable,
}
}
fn classifySystemVStruct(
result: *[8]Class,
starting_byte_offset: u64,
loaded_struct: InternPool.LoadedStructType,
zcu: *Zcu,
target: *const std.Target,
) u64 {
const ip = &zcu.intern_pool;
var byte_offset = starting_byte_offset;
var field_it = loaded_struct.iterateRuntimeOrder(ip);
while (field_it.next()) |field_index| {
const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
const field_align = loaded_struct.fieldAlign(ip, field_index);
byte_offset = std.mem.alignForward(
u64,
byte_offset,
field_align.toByteUnits() orelse field_ty.abiAlignment(zcu).toByteUnits().?,
);
if (zcu.typeToStruct(field_ty)) |field_loaded_struct| {
switch (field_loaded_struct.layout) {
.auto => unreachable,
.@"extern" => {
byte_offset = classifySystemVStruct(result, byte_offset, field_loaded_struct, zcu, target);
continue;
},
.@"packed" => {},
}
} else if (zcu.typeToUnion(field_ty)) |field_loaded_union| {
switch (field_loaded_union.flagsUnordered(ip).layout) {
.auto => unreachable,
.@"extern" => {
byte_offset = classifySystemVUnion(result, byte_offset, field_loaded_union, zcu, target);
continue;
},
.@"packed" => {},
}
}
const field_classes = std.mem.sliceTo(&classifySystemV(field_ty, zcu, target, .other), .none);
for (result[@intCast(byte_offset / 8)..][0..field_classes.len], field_classes) |*result_class, field_class|
result_class.* = result_class.combineSystemV(field_class);
byte_offset += field_ty.abiSize(zcu);
}
const final_byte_offset = starting_byte_offset + loaded_struct.sizeUnordered(ip);
std.debug.assert(final_byte_offset == std.mem.alignForward(
u64,
byte_offset,
loaded_struct.flagsUnordered(ip).alignment.toByteUnits().?,
));
return final_byte_offset;
}
fn classifySystemVUnion(
result: *[8]Class,
starting_byte_offset: u64,
loaded_union: InternPool.LoadedUnionType,
zcu: *Zcu,
target: *const std.Target,
) u64 {
const ip = &zcu.intern_pool;
for (0..loaded_union.field_types.len) |field_index| {
const field_ty = Type.fromInterned(loaded_union.field_types.get(ip)[field_index]);
if (zcu.typeToStruct(field_ty)) |field_loaded_struct| {
switch (field_loaded_struct.layout) {
.auto => unreachable,
.@"extern" => {
_ = classifySystemVStruct(result, starting_byte_offset, field_loaded_struct, zcu, target);
continue;
},
.@"packed" => {},
}
} else if (zcu.typeToUnion(field_ty)) |field_loaded_union| {
switch (field_loaded_union.flagsUnordered(ip).layout) {
.auto => unreachable,
.@"extern" => {
_ = classifySystemVUnion(result, starting_byte_offset, field_loaded_union, zcu, target);
continue;
},
.@"packed" => {},
}
}
const field_classes = std.mem.sliceTo(&classifySystemV(field_ty, zcu, target, .other), .none);
for (result[@intCast(starting_byte_offset / 8)..][0..field_classes.len], field_classes) |*result_class, field_class|
result_class.* = result_class.combineSystemV(field_class);
}
return starting_byte_offset + loaded_union.sizeUnordered(ip);
}
pub const zigcc = struct {
pub const stack_align: ?InternPool.Alignment = null;
pub const return_in_regs = true;
pub const params_in_regs = true;
const volatile_gpr = gp_regs.len - 5;
const volatile_x87 = x87_regs.len - 1;
const volatile_sse = sse_avx_regs.len;
/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
/// for anything else but stack offset tracking therefore we exclude them from this set.
pub const callee_preserved_regs = gp_regs[volatile_gpr..] ++ x87_regs[volatile_x87 .. x87_regs.len - 1] ++ sse_avx_regs[volatile_sse..];
/// These registers need to be preserved (saved on the stack) and restored by the caller before
/// the caller relinquishes control to a subroutine via call instruction (or similar).
/// In other words, these registers are free to use by the callee.
pub const caller_preserved_regs = gp_regs[0..volatile_gpr] ++ x87_regs[0..volatile_x87] ++ sse_avx_regs[0..volatile_sse];
const int_param_regs = gp_regs[0 .. volatile_gpr - 1];
const x87_param_regs = x87_regs[0..volatile_x87];
const sse_param_regs = sse_avx_regs[0 .. volatile_sse / 2];
const int_return_regs = gp_regs[0..volatile_gpr];
const x87_return_regs = x87_regs[0..volatile_x87];
const sse_return_regs = sse_avx_regs[0..volatile_gpr];
};
pub const SysV = struct {
/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
/// for anything else but stack offset tracking therefore we exclude them from this set.
pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
/// These registers need to be preserved (saved on the stack) and restored by the caller before
/// the caller relinquishes control to a subroutine via call instruction (or similar).
/// In other words, these registers are free to use by the callee.
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 } ++ x87_regs ++ sse_avx_regs;
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
pub const c_abi_x87_param_regs = x87_regs[0..0];
pub const c_abi_sse_param_regs = sse_avx_regs[0..8];
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
pub const c_abi_x87_return_regs = x87_regs[0..2];
pub const c_abi_sse_return_regs = sse_avx_regs[0..4];
};
pub const Win64 = struct {
/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
/// for anything else but stack offset tracking therefore we exclude them from this set.
pub const callee_preserved_regs = [_]Register{ .rbx, .rsi, .rdi, .r12, .r13, .r14, .r15 };
/// These registers need to be preserved (saved on the stack) and restored by the caller before
/// the caller relinquishes control to a subroutine via call instruction (or similar).
/// In other words, these registers are free to use by the callee.
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 } ++ x87_regs ++ sse_avx_regs;
pub const c_abi_int_param_regs = [_]Register{ .rcx, .rdx, .r8, .r9 };
pub const c_abi_x87_param_regs = x87_regs[0..0];
pub const c_abi_sse_param_regs = sse_avx_regs[0..4];
pub const c_abi_int_return_regs = [_]Register{.rax};
pub const c_abi_x87_return_regs = x87_regs[0..0];
pub const c_abi_sse_return_regs = sse_avx_regs[0..1];
};
pub fn getCalleePreservedRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.callee_preserved_regs,
.x86_64_sysv => &SysV.callee_preserved_regs,
.x86_64_win => &Win64.callee_preserved_regs,
else => unreachable,
};
}
pub fn getCallerPreservedRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.caller_preserved_regs,
.x86_64_sysv => &SysV.caller_preserved_regs,
.x86_64_win => &Win64.caller_preserved_regs,
else => unreachable,
};
}
pub fn getCAbiIntParamRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.int_param_regs,
.x86_64_sysv => &SysV.c_abi_int_param_regs,
.x86_64_win => &Win64.c_abi_int_param_regs,
else => unreachable,
};
}
pub fn getCAbiX87ParamRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.x87_param_regs,
.x86_64_sysv => SysV.c_abi_x87_param_regs,
.x86_64_win => Win64.c_abi_x87_param_regs,
else => unreachable,
};
}
pub fn getCAbiSseParamRegs(cc: std.builtin.CallingConvention.Tag, target: *const std.Target) []const Register {
return switch (cc) {
.auto => switch (target.cpu.arch) {
else => unreachable,
.x86 => zigcc.sse_param_regs[0 .. zigcc.sse_param_regs.len / 2],
.x86_64 => zigcc.sse_param_regs,
},
.x86_64_sysv => SysV.c_abi_sse_param_regs,
.x86_64_win => Win64.c_abi_sse_param_regs,
else => unreachable,
};
}
pub fn getCAbiIntReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.int_return_regs,
.x86_64_sysv => &SysV.c_abi_int_return_regs,
.x86_64_win => &Win64.c_abi_int_return_regs,
else => unreachable,
};
}
pub fn getCAbiX87ReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.x87_return_regs,
.x86_64_sysv => SysV.c_abi_x87_return_regs,
.x86_64_win => Win64.c_abi_x87_return_regs,
else => unreachable,
};
}
pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Register {
return switch (cc) {
.auto => zigcc.sse_return_regs,
.x86_64_sysv => SysV.c_abi_sse_return_regs,
.x86_64_win => Win64.c_abi_sse_return_regs,
else => unreachable,
};
}
pub fn getCAbiLinkerScratchReg(cc: std.builtin.CallingConvention.Tag) Register {
return switch (cc) {
.auto => zigcc.int_return_regs[zigcc.int_return_regs.len - 1],
.x86_64_sysv => SysV.c_abi_int_return_regs[0],
.x86_64_win => Win64.c_abi_int_return_regs[0],
else => unreachable,
};
}
const gp_regs = [_]Register{
.rax, .rdx, .rbx, .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
};
const x87_regs = [_]Register{
.st0, .st1, .st2, .st3, .st4, .st5, .st6, .st7,
};
const sse_avx_regs = [_]Register{
.ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7,
.ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
};
const allocatable_regs = gp_regs ++ x87_regs[0 .. x87_regs.len - 1] ++ sse_avx_regs;
pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, allocatable_regs);
// Register classes
const RegisterBitSet = RegisterManager.RegisterBitSet;
pub const RegisterClass = struct {
pub const gp: RegisterBitSet = blk: {
var set = RegisterBitSet.initEmpty();
for (allocatable_regs, 0..) |reg, index| if (reg.isClass(.general_purpose)) set.set(index);
break :blk set;
};
pub const gphi: RegisterBitSet = blk: {
var set = RegisterBitSet.initEmpty();
for (allocatable_regs, 0..) |reg, index| if (reg.isClass(.gphi)) set.set(index);
break :blk set;
};
pub const x87: RegisterBitSet = blk: {
var set = RegisterBitSet.initEmpty();
for (allocatable_regs, 0..) |reg, index| if (reg.isClass(.x87)) set.set(index);
break :blk set;
};
pub const sse: RegisterBitSet = blk: {
var set = RegisterBitSet.initEmpty();
for (allocatable_regs, 0..) |reg, index| if (reg.isClass(.sse)) set.set(index);
break :blk set;
};
};
const builtin = @import("builtin");
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
const InternPool = @import("../../InternPool.zig");
const Register = @import("bits.zig").Register;
const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager;
const Type = @import("../../Type.zig");
const Value = @import("../../Value.zig");
const Zcu = @import("../../Zcu.zig");