mirror of
https://github.com/ziglang/zig.git
synced 2026-01-20 22:35:24 +00:00
Merge pull request #11828 from devins2518/arm-atomics
compiler_rt: aarch64 outline atomics
This commit is contained in:
parent
a3232c6764
commit
2bbaf95ebe
@ -1,8 +1,15 @@
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub const panic = @import("compiler_rt/common.zig").panic;
|
||||
|
||||
comptime {
|
||||
_ = @import("compiler_rt/atomics.zig");
|
||||
|
||||
// macOS has these functions inside libSystem.
|
||||
if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) {
|
||||
_ = @import("compiler_rt/aarch64_outline_atomics.zig");
|
||||
}
|
||||
|
||||
_ = @import("compiler_rt/addf3.zig");
|
||||
_ = @import("compiler_rt/addhf3.zig");
|
||||
_ = @import("compiler_rt/addsf3.zig");
|
||||
|
||||
2227
lib/compiler_rt/aarch64_outline_atomics.zig
Normal file
2227
lib/compiler_rt/aarch64_outline_atomics.zig
Normal file
File diff suppressed because it is too large
Load Diff
338
tools/gen_outline_atomics.zig
Normal file
338
tools/gen_outline_atomics.zig
Normal file
@ -0,0 +1,338 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const AtomicOp = enum {
|
||||
cas,
|
||||
swp,
|
||||
ldadd,
|
||||
ldclr,
|
||||
ldeor,
|
||||
ldset,
|
||||
};
|
||||
|
||||
pub fn main() !void {
|
||||
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena_instance.deinit();
|
||||
const arena = arena_instance.allocator();
|
||||
|
||||
//const args = try std.process.argsAlloc(arena);
|
||||
|
||||
var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
|
||||
const w = bw.writer();
|
||||
|
||||
try w.writeAll(
|
||||
\\//! This file is generated by tools/gen_outline_atomics.zig.
|
||||
\\const builtin = @import("builtin");
|
||||
\\const std = @import("std");
|
||||
\\const linkage = @import("./common.zig").linkage;
|
||||
\\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
|
||||
\\
|
||||
\\/// This default is overridden at runtime after inspecting CPU properties.
|
||||
\\/// It is intentionally not exported in order to make the machine code that
|
||||
\\/// uses it a statically predicted direct branch rather than using the PLT,
|
||||
\\/// which ARM is concerned would have too much overhead.
|
||||
\\var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
|
||||
\\
|
||||
\\
|
||||
);
|
||||
|
||||
var footer = std.ArrayList(u8).init(arena);
|
||||
try footer.appendSlice("\ncomptime {\n");
|
||||
|
||||
for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
|
||||
for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
|
||||
for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
|
||||
if (n == .sixteen and op != .cas) continue;
|
||||
|
||||
const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
|
||||
@tagName(op), n.toBytes(), @tagName(order),
|
||||
});
|
||||
try writeFunction(arena, w, name, op, n, order);
|
||||
try footer.writer().print(" @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{
|
||||
name, name,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try w.writeAll(footer.items);
|
||||
try w.writeAll("}\n");
|
||||
try bw.flush();
|
||||
}
|
||||
|
||||
fn writeFunction(
|
||||
arena: Allocator,
|
||||
w: anytype,
|
||||
name: []const u8,
|
||||
op: AtomicOp,
|
||||
n: N,
|
||||
order: Ordering,
|
||||
) !void {
|
||||
const body = switch (op) {
|
||||
.cas => try generateCas(arena, n, order),
|
||||
.swp => try generateSwp(arena, n, order),
|
||||
.ldadd => try generateLd(arena, n, order, .ldadd),
|
||||
.ldclr => try generateLd(arena, n, order, .ldclr),
|
||||
.ldeor => try generateLd(arena, n, order, .ldeor),
|
||||
.ldset => try generateLd(arena, n, order, .ldset),
|
||||
};
|
||||
const fn_sig = try std.fmt.allocPrint(
|
||||
arena,
|
||||
"fn {[name]s}() align(16) callconv(.Naked) void {{",
|
||||
.{ .name = name },
|
||||
);
|
||||
try w.writeAll(fn_sig);
|
||||
try w.writeAll(
|
||||
\\
|
||||
\\ @setRuntimeSafety(false);
|
||||
\\ asm volatile (
|
||||
\\
|
||||
);
|
||||
var iter = std.mem.split(u8, body, "\n");
|
||||
while (iter.next()) |line| {
|
||||
try w.writeAll(" \\\\");
|
||||
try w.writeAll(line);
|
||||
try w.writeAll("\n");
|
||||
}
|
||||
try w.writeAll(
|
||||
\\ :
|
||||
\\ : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
|
||||
\\ : "w15", "w16", "w17", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
\\
|
||||
);
|
||||
}
|
||||
|
||||
const N = enum(u8) {
|
||||
one = 1,
|
||||
two = 2,
|
||||
four = 4,
|
||||
eight = 8,
|
||||
sixteen = 16,
|
||||
|
||||
const Defines = struct {
|
||||
s: []const u8,
|
||||
uxt: []const u8,
|
||||
b: []const u8,
|
||||
};
|
||||
|
||||
fn defines(n: N) Defines {
|
||||
const s = switch (n) {
|
||||
.one => "b",
|
||||
.two => "h",
|
||||
else => "",
|
||||
};
|
||||
const uxt = switch (n) {
|
||||
.one => "uxtb",
|
||||
.two => "uxth",
|
||||
.four, .eight, .sixteen => "mov",
|
||||
};
|
||||
const b = switch (n) {
|
||||
.one => "0x00000000",
|
||||
.two => "0x40000000",
|
||||
.four => "0x80000000",
|
||||
.eight => "0xc0000000",
|
||||
else => "0x00000000",
|
||||
};
|
||||
return Defines{
|
||||
.s = s,
|
||||
.uxt = uxt,
|
||||
.b = b,
|
||||
};
|
||||
}
|
||||
|
||||
fn register(n: N) []const u8 {
|
||||
return if (@enumToInt(n) < 8) "w" else "x";
|
||||
}
|
||||
|
||||
fn toBytes(n: N) u8 {
|
||||
return @enumToInt(n);
|
||||
}
|
||||
|
||||
fn toBits(n: N) u8 {
|
||||
return n.toBytes() * 8;
|
||||
}
|
||||
};
|
||||
|
||||
const Ordering = enum {
|
||||
relax,
|
||||
acq,
|
||||
rel,
|
||||
acq_rel,
|
||||
|
||||
const Defines = struct {
|
||||
suff: []const u8,
|
||||
a: []const u8,
|
||||
l: []const u8,
|
||||
m: []const u8,
|
||||
n: []const u8,
|
||||
};
|
||||
fn defines(self: @This()) Defines {
|
||||
const suff = switch (self) {
|
||||
.relax => "_relax",
|
||||
.acq => "_acq",
|
||||
.rel => "_rel",
|
||||
.acq_rel => "_acq_rel",
|
||||
};
|
||||
const a = switch (self) {
|
||||
.relax => "",
|
||||
.acq => "a",
|
||||
.rel => "",
|
||||
.acq_rel => "a",
|
||||
};
|
||||
const l = switch (self) {
|
||||
.relax => "",
|
||||
.acq => "",
|
||||
.rel => "l",
|
||||
.acq_rel => "l",
|
||||
};
|
||||
const m = switch (self) {
|
||||
.relax => "0x000000",
|
||||
.acq => "0x400000",
|
||||
.rel => "0x008000",
|
||||
.acq_rel => "0x408000",
|
||||
};
|
||||
const n = switch (self) {
|
||||
.relax => "0x000000",
|
||||
.acq => "0x800000",
|
||||
.rel => "0x400000",
|
||||
.acq_rel => "0xc00000",
|
||||
};
|
||||
return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n };
|
||||
}
|
||||
};
|
||||
|
||||
const LdName = enum { ldadd, ldclr, ldeor, ldset };
|
||||
|
||||
fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
|
||||
const s_def = n.defines();
|
||||
const o_def = order.defines();
|
||||
|
||||
const reg = n.register();
|
||||
|
||||
if (@enumToInt(n) < 16) {
|
||||
const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m });
|
||||
const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
|
||||
const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
|
||||
|
||||
return try std.fmt.allocPrint(arena,
|
||||
\\ cbz w16, 8f
|
||||
\\ {[cas]s}
|
||||
\\ ret
|
||||
\\8:
|
||||
\\ {[uxt]s} {[reg]s}16, {[reg]s}0
|
||||
\\0:
|
||||
\\ {[ldxr]s} {[reg]s}0, [x2]
|
||||
\\ cmp {[reg]s}0, {[reg]s}16
|
||||
\\ bne 1f
|
||||
\\ {[stxr]s} w17, {[reg]s}1, [x2]
|
||||
\\ cbnz w17, 0b
|
||||
\\1:
|
||||
\\ ret
|
||||
, .{
|
||||
.cas = cas,
|
||||
.uxt = s_def.uxt,
|
||||
.ldxr = ldxr,
|
||||
.stxr = stxr,
|
||||
.reg = reg,
|
||||
});
|
||||
} else {
|
||||
const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m});
|
||||
const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
|
||||
const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
|
||||
|
||||
return try std.fmt.allocPrint(arena,
|
||||
\\ cbz w16, 8f
|
||||
\\ {[casp]s}
|
||||
\\ ret
|
||||
\\8:
|
||||
\\ mov x16, x0
|
||||
\\ mov x17, x1
|
||||
\\0:
|
||||
\\ {[ldxp]s} x0, x1, [x4]
|
||||
\\ cmp x0, x16
|
||||
\\ ccmp x1, x17, #0, eq
|
||||
\\ bne 1f
|
||||
\\ {[stxp]s} w15, x2, x3, [x4]
|
||||
\\ cbnz w15, 0b
|
||||
\\1:
|
||||
\\ ret
|
||||
, .{
|
||||
.casp = casp,
|
||||
.ldxp = ldxp,
|
||||
.stxp = stxp,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
|
||||
const s_def = n.defines();
|
||||
const o_def = order.defines();
|
||||
const reg = n.register();
|
||||
|
||||
return try std.fmt.allocPrint(arena,
|
||||
\\ cbz w16, 8f
|
||||
\\ .inst 0x38208020 + {[b]s} + {[n]s}
|
||||
\\ ret
|
||||
\\8:
|
||||
\\ mov {[reg]s}16, {[reg]s}0
|
||||
\\0:
|
||||
\\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
|
||||
\\ st{[l]s}xr{[s]s} w17, {[reg]s}16, [x1]
|
||||
\\ cbnz w17, 0b
|
||||
\\1:
|
||||
\\ ret
|
||||
, .{
|
||||
.b = s_def.b,
|
||||
.n = o_def.n,
|
||||
.reg = reg,
|
||||
.s = s_def.s,
|
||||
.a = o_def.a,
|
||||
.l = o_def.l,
|
||||
});
|
||||
}
|
||||
|
||||
fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
|
||||
const s_def = n.defines();
|
||||
const o_def = order.defines();
|
||||
const op = switch (ld) {
|
||||
.ldadd => "add",
|
||||
.ldclr => "bic",
|
||||
.ldeor => "eor",
|
||||
.ldset => "orr",
|
||||
};
|
||||
const op_n = switch (ld) {
|
||||
.ldadd => "0x0000",
|
||||
.ldclr => "0x1000",
|
||||
.ldeor => "0x2000",
|
||||
.ldset => "0x3000",
|
||||
};
|
||||
|
||||
const reg = n.register();
|
||||
|
||||
return try std.fmt.allocPrint(arena,
|
||||
\\ cbz w16, 8f
|
||||
\\ .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
|
||||
\\ ret
|
||||
\\8:
|
||||
\\ mov {[reg]s}16, {[reg]s}0
|
||||
\\0:
|
||||
\\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
|
||||
\\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16
|
||||
\\ st{[l]s}xr{[s]s} w15, {[reg]s}17, [x1]
|
||||
\\ cbnz w15, 0b
|
||||
\\1:
|
||||
\\ ret
|
||||
, .{
|
||||
.op_n = op_n,
|
||||
.b = s_def.b,
|
||||
.n = o_def.n,
|
||||
.s = s_def.s,
|
||||
.a = o_def.a,
|
||||
.l = o_def.l,
|
||||
.op = op,
|
||||
.reg = reg,
|
||||
});
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user