simplify outline atomics

* Rely on libSystem when targeting macOS.
 * Make tools/gen_outline_atomics.zig more idiomatic.
 * Remove the CPU detection / auxval checking from compiler_rt. This
   functionality belongs in a different component. Zig's compiler_rt
   must not rely on constructors. Instead it will export a symbol for
   setting the value, and start code can detect and activate it.
 * Remove the separate logic for inline assembly when the target does or
   does not have lse support. `.inst` works in both cases.
This commit is contained in:
Andrew Kelley 2022-11-21 12:36:26 -07:00
parent a8f2d00ec4
commit 95ee8ab77d
3 changed files with 330 additions and 8721 deletions

View File

@ -1,8 +1,14 @@
const builtin = @import("builtin");
pub const panic = @import("compiler_rt/common.zig").panic; pub const panic = @import("compiler_rt/common.zig").panic;
comptime { comptime {
_ = @import("compiler_rt/atomics.zig"); _ = @import("compiler_rt/atomics.zig");
_ = @import("compiler_rt/lse_atomics.zig");
// macOS has these functions inside libSystem.
if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) {
_ = @import("compiler_rt/lse_atomics.zig");
}
_ = @import("compiler_rt/addf3.zig"); _ = @import("compiler_rt/addf3.zig");
_ = @import("compiler_rt/addhf3.zig"); _ = @import("compiler_rt/addhf3.zig");

File diff suppressed because it is too large Load Diff

View File

@ -11,217 +11,97 @@ const AtomicOp = enum {
}; };
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer std.debug.assert(!gpa.deinit()); defer arena_instance.deinit();
var allocator = gpa.allocator(); const arena = arena_instance.allocator();
const args = try std.process.argsAlloc(allocator); //const args = try std.process.argsAlloc(arena);
defer std.process.argsFree(allocator, args);
if (args.len != 2) {
usageAndExit(std.io.getStdErr(), args[0], 1);
}
var file = try std.fs.cwd().createFile(args[1], .{ .truncate = true }); var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
const w = bw.writer();
try file.writeAll( try w.writeAll(
\\const std = @import("std"); \\//! This file is generated by tools/gen_outline_atomics.zig.
\\const builtin = @import("builtin"); \\const builtin = @import("builtin");
\\const arch = builtin.cpu.arch; \\const std = @import("std");
\\const is_test = builtin.is_test; \\const linkage = @import("./common.zig").linkage;
\\const target = std.Target; \\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
\\const os_tag = builtin.os.tag; \\
\\const is_darwin = target.Os.Tag.isDarwin(os_tag); \\/// This default is overridden at runtime after inspecting CPU properties.
\\const has_lse = target.aarch64.featureSetHas(builtin.target.cpu.features, .lse); \\/// It is intentionally not exported in order to make the machine code that
\\const linkage = if (is_test) \\/// uses it a statically predicted direct branch rather than using the PLT,
\\ std.builtin.GlobalLinkage.Internal \\/// which ARM is concerned would have too much overhead.
\\else \\var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
\\ std.builtin.GlobalLinkage.Strong;
\\ \\
\\ \\
); );
var footer = std.ArrayList(u8).init(arena);
try footer.appendSlice("\ncomptime {\n");
for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| { for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| { for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |pat| { for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
if (pat == .cas or n != .sixteen) { if (n == .sixteen and op != .cas) continue;
for ([_]bool{ true, false }) |darwin| {
for ([_]bool{ true, false }) |lse| { const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
const darwin_name = if (darwin) "Darwin" else "Nondarwin"; @tagName(op), n.toBytes(), @tagName(order),
const lse_name = if (lse) "Lse" else "Nolse"; });
var buf: [100:0]u8 = undefined; try writeFunction(arena, w, name, op, n, order);
const name = try std.fmt.bufPrintZ(&buf, "{s}{s}{s}{s}{s}", .{ @tagName(pat), n.toBytes(), order.capName(), darwin_name, lse_name }); try footer.writer().print(" @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{
const body = switch (pat) { name, name,
.cas => try generateCas(&allocator, n, order, lse), });
.swp => try generateSwp(&allocator, n, order, lse),
.ldadd => try generateLd(&allocator, n, order, .ldadd, lse),
.ldclr => try generateLd(&allocator, n, order, .ldclr, lse),
.ldeor => try generateLd(&allocator, n, order, .ldeor, lse),
.ldset => try generateLd(&allocator, n, order, .ldset, lse),
};
defer allocator.destroy(body.ptr);
try writeFunction(&file, name, pat, n, body);
}
}
try writeExport(&file, @tagName(pat), n.toBytes(), order);
}
} }
} }
} }
try file.writeAll( try w.writeAll(footer.items);
\\//TODO: Add linksection once implemented and remove init at writeFunction try w.writeAll("}\n");
\\fn __init_aarch64_have_lse_atomics() callconv(.C) void { try bw.flush();
\\ const AT_HWCAP = 16;
\\ const HWCAP_ATOMICS = 1 << 8;
\\ const hwcap = std.os.linux.getauxval(AT_HWCAP);
\\ __aarch64_have_lse_atomics = @boolToInt((hwcap & HWCAP_ATOMICS) != 0);
\\}
\\
\\var __aarch64_have_lse_atomics: u8 = @boolToInt(has_lse);
\\
\\comptime {
\\ if (arch.isAARCH64()) {
\\ @export(__aarch64_cas1_relax, .{ .name = "__aarch64_cas1_relax", .linkage = linkage });
\\ @export(__aarch64_cas1_acq, .{ .name = "__aarch64_cas1_acq", .linkage = linkage });
\\ @export(__aarch64_cas1_rel, .{ .name = "__aarch64_cas1_rel", .linkage = linkage });
\\ @export(__aarch64_cas1_acq_rel, .{ .name = "__aarch64_cas1_acq_rel", .linkage = linkage });
\\ @export(__aarch64_cas2_relax, .{ .name = "__aarch64_cas2_relax", .linkage = linkage });
\\ @export(__aarch64_cas2_acq, .{ .name = "__aarch64_cas2_acq", .linkage = linkage });
\\ @export(__aarch64_cas2_rel, .{ .name = "__aarch64_cas2_rel", .linkage = linkage });
\\ @export(__aarch64_cas2_acq_rel, .{ .name = "__aarch64_cas2_acq_rel", .linkage = linkage });
\\ @export(__aarch64_cas4_relax, .{ .name = "__aarch64_cas4_relax", .linkage = linkage });
\\ @export(__aarch64_cas4_acq, .{ .name = "__aarch64_cas4_acq", .linkage = linkage });
\\ @export(__aarch64_cas4_rel, .{ .name = "__aarch64_cas4_rel", .linkage = linkage });
\\ @export(__aarch64_cas4_acq_rel, .{ .name = "__aarch64_cas4_acq_rel", .linkage = linkage });
\\ @export(__aarch64_cas8_relax, .{ .name = "__aarch64_cas8_relax", .linkage = linkage });
\\ @export(__aarch64_cas8_acq, .{ .name = "__aarch64_cas8_acq", .linkage = linkage });
\\ @export(__aarch64_cas8_rel, .{ .name = "__aarch64_cas8_rel", .linkage = linkage });
\\ @export(__aarch64_cas8_acq_rel, .{ .name = "__aarch64_cas8_acq_rel", .linkage = linkage });
\\ @export(__aarch64_cas16_relax, .{ .name = "__aarch64_cas16_relax", .linkage = linkage });
\\ @export(__aarch64_cas16_acq, .{ .name = "__aarch64_cas16_acq", .linkage = linkage });
\\ @export(__aarch64_cas16_rel, .{ .name = "__aarch64_cas16_rel", .linkage = linkage });
\\ @export(__aarch64_cas16_acq_rel, .{ .name = "__aarch64_cas16_acq_rel", .linkage = linkage });
\\ @export(__aarch64_swp1_relax, .{ .name = "__aarch64_swp1_relax", .linkage = linkage });
\\ @export(__aarch64_swp1_acq, .{ .name = "__aarch64_swp1_acq", .linkage = linkage });
\\ @export(__aarch64_swp1_rel, .{ .name = "__aarch64_swp1_rel", .linkage = linkage });
\\ @export(__aarch64_swp1_acq_rel, .{ .name = "__aarch64_swp1_acq_rel", .linkage = linkage });
\\ @export(__aarch64_swp2_relax, .{ .name = "__aarch64_swp2_relax", .linkage = linkage });
\\ @export(__aarch64_swp2_acq, .{ .name = "__aarch64_swp2_acq", .linkage = linkage });
\\ @export(__aarch64_swp2_rel, .{ .name = "__aarch64_swp2_rel", .linkage = linkage });
\\ @export(__aarch64_swp2_acq_rel, .{ .name = "__aarch64_swp2_acq_rel", .linkage = linkage });
\\ @export(__aarch64_swp4_relax, .{ .name = "__aarch64_swp4_relax", .linkage = linkage });
\\ @export(__aarch64_swp4_acq, .{ .name = "__aarch64_swp4_acq", .linkage = linkage });
\\ @export(__aarch64_swp4_rel, .{ .name = "__aarch64_swp4_rel", .linkage = linkage });
\\ @export(__aarch64_swp4_acq_rel, .{ .name = "__aarch64_swp4_acq_rel", .linkage = linkage });
\\ @export(__aarch64_swp8_relax, .{ .name = "__aarch64_swp8_relax", .linkage = linkage });
\\ @export(__aarch64_swp8_acq, .{ .name = "__aarch64_swp8_acq", .linkage = linkage });
\\ @export(__aarch64_swp8_rel, .{ .name = "__aarch64_swp8_rel", .linkage = linkage });
\\ @export(__aarch64_swp8_acq_rel, .{ .name = "__aarch64_swp8_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd1_relax, .{ .name = "__aarch64_ldadd1_relax", .linkage = linkage });
\\ @export(__aarch64_ldadd1_acq, .{ .name = "__aarch64_ldadd1_acq", .linkage = linkage });
\\ @export(__aarch64_ldadd1_rel, .{ .name = "__aarch64_ldadd1_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd1_acq_rel, .{ .name = "__aarch64_ldadd1_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd2_relax, .{ .name = "__aarch64_ldadd2_relax", .linkage = linkage });
\\ @export(__aarch64_ldadd2_acq, .{ .name = "__aarch64_ldadd2_acq", .linkage = linkage });
\\ @export(__aarch64_ldadd2_rel, .{ .name = "__aarch64_ldadd2_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd2_acq_rel, .{ .name = "__aarch64_ldadd2_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd4_relax, .{ .name = "__aarch64_ldadd4_relax", .linkage = linkage });
\\ @export(__aarch64_ldadd4_acq, .{ .name = "__aarch64_ldadd4_acq", .linkage = linkage });
\\ @export(__aarch64_ldadd4_rel, .{ .name = "__aarch64_ldadd4_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd4_acq_rel, .{ .name = "__aarch64_ldadd4_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd8_relax, .{ .name = "__aarch64_ldadd8_relax", .linkage = linkage });
\\ @export(__aarch64_ldadd8_acq, .{ .name = "__aarch64_ldadd8_acq", .linkage = linkage });
\\ @export(__aarch64_ldadd8_rel, .{ .name = "__aarch64_ldadd8_rel", .linkage = linkage });
\\ @export(__aarch64_ldadd8_acq_rel, .{ .name = "__aarch64_ldadd8_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr1_relax, .{ .name = "__aarch64_ldclr1_relax", .linkage = linkage });
\\ @export(__aarch64_ldclr1_acq, .{ .name = "__aarch64_ldclr1_acq", .linkage = linkage });
\\ @export(__aarch64_ldclr1_rel, .{ .name = "__aarch64_ldclr1_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr1_acq_rel, .{ .name = "__aarch64_ldclr1_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr2_relax, .{ .name = "__aarch64_ldclr2_relax", .linkage = linkage });
\\ @export(__aarch64_ldclr2_acq, .{ .name = "__aarch64_ldclr2_acq", .linkage = linkage });
\\ @export(__aarch64_ldclr2_rel, .{ .name = "__aarch64_ldclr2_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr2_acq_rel, .{ .name = "__aarch64_ldclr2_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr4_relax, .{ .name = "__aarch64_ldclr4_relax", .linkage = linkage });
\\ @export(__aarch64_ldclr4_acq, .{ .name = "__aarch64_ldclr4_acq", .linkage = linkage });
\\ @export(__aarch64_ldclr4_rel, .{ .name = "__aarch64_ldclr4_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr4_acq_rel, .{ .name = "__aarch64_ldclr4_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr8_relax, .{ .name = "__aarch64_ldclr8_relax", .linkage = linkage });
\\ @export(__aarch64_ldclr8_acq, .{ .name = "__aarch64_ldclr8_acq", .linkage = linkage });
\\ @export(__aarch64_ldclr8_rel, .{ .name = "__aarch64_ldclr8_rel", .linkage = linkage });
\\ @export(__aarch64_ldclr8_acq_rel, .{ .name = "__aarch64_ldclr8_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor1_relax, .{ .name = "__aarch64_ldeor1_relax", .linkage = linkage });
\\ @export(__aarch64_ldeor1_acq, .{ .name = "__aarch64_ldeor1_acq", .linkage = linkage });
\\ @export(__aarch64_ldeor1_rel, .{ .name = "__aarch64_ldeor1_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor1_acq_rel, .{ .name = "__aarch64_ldeor1_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor2_relax, .{ .name = "__aarch64_ldeor2_relax", .linkage = linkage });
\\ @export(__aarch64_ldeor2_acq, .{ .name = "__aarch64_ldeor2_acq", .linkage = linkage });
\\ @export(__aarch64_ldeor2_rel, .{ .name = "__aarch64_ldeor2_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor2_acq_rel, .{ .name = "__aarch64_ldeor2_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor4_relax, .{ .name = "__aarch64_ldeor4_relax", .linkage = linkage });
\\ @export(__aarch64_ldeor4_acq, .{ .name = "__aarch64_ldeor4_acq", .linkage = linkage });
\\ @export(__aarch64_ldeor4_rel, .{ .name = "__aarch64_ldeor4_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor4_acq_rel, .{ .name = "__aarch64_ldeor4_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor8_relax, .{ .name = "__aarch64_ldeor8_relax", .linkage = linkage });
\\ @export(__aarch64_ldeor8_acq, .{ .name = "__aarch64_ldeor8_acq", .linkage = linkage });
\\ @export(__aarch64_ldeor8_rel, .{ .name = "__aarch64_ldeor8_rel", .linkage = linkage });
\\ @export(__aarch64_ldeor8_acq_rel, .{ .name = "__aarch64_ldeor8_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldset1_relax, .{ .name = "__aarch64_ldset1_relax", .linkage = linkage });
\\ @export(__aarch64_ldset1_acq, .{ .name = "__aarch64_ldset1_acq", .linkage = linkage });
\\ @export(__aarch64_ldset1_rel, .{ .name = "__aarch64_ldset1_rel", .linkage = linkage });
\\ @export(__aarch64_ldset1_acq_rel, .{ .name = "__aarch64_ldset1_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldset2_relax, .{ .name = "__aarch64_ldset2_relax", .linkage = linkage });
\\ @export(__aarch64_ldset2_acq, .{ .name = "__aarch64_ldset2_acq", .linkage = linkage });
\\ @export(__aarch64_ldset2_rel, .{ .name = "__aarch64_ldset2_rel", .linkage = linkage });
\\ @export(__aarch64_ldset2_acq_rel, .{ .name = "__aarch64_ldset2_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldset4_relax, .{ .name = "__aarch64_ldset4_relax", .linkage = linkage });
\\ @export(__aarch64_ldset4_acq, .{ .name = "__aarch64_ldset4_acq", .linkage = linkage });
\\ @export(__aarch64_ldset4_rel, .{ .name = "__aarch64_ldset4_rel", .linkage = linkage });
\\ @export(__aarch64_ldset4_acq_rel, .{ .name = "__aarch64_ldset4_acq_rel", .linkage = linkage });
\\ @export(__aarch64_ldset8_relax, .{ .name = "__aarch64_ldset8_relax", .linkage = linkage });
\\ @export(__aarch64_ldset8_acq, .{ .name = "__aarch64_ldset8_acq", .linkage = linkage });
\\ @export(__aarch64_ldset8_rel, .{ .name = "__aarch64_ldset8_rel", .linkage = linkage });
\\ @export(__aarch64_ldset8_acq_rel, .{ .name = "__aarch64_ldset8_acq_rel", .linkage = linkage });
\\ }
\\}
\\
);
} }
fn usageAndExit(file: std.fs.File, arg0: []const u8, code: u8) noreturn { fn writeFunction(
file.writer().print( arena: Allocator,
\\Usage: {s} /path/to/lib/compiler_rt/lse_atomics.zig w: anytype,
\\ name: []const u8,
\\Generates outline atomics for compiler-rt. op: AtomicOp,
\\ n: N,
, .{arg0}) catch std.process.exit(1); order: Ordering,
std.process.exit(code); ) !void {
} const body = switch (op) {
.cas => try generateCas(arena, n, order),
fn writeFunction(file: *std.fs.File, name: [:0]const u8, op: AtomicOp, n: N, body: [:0]const u8) !void { .swp => try generateSwp(arena, n, order),
var fn_buf: [100:0]u8 = undefined; .ldadd => try generateLd(arena, n, order, .ldadd),
.ldclr => try generateLd(arena, n, order, .ldclr),
.ldeor => try generateLd(arena, n, order, .ldeor),
.ldset => try generateLd(arena, n, order, .ldset),
};
const fn_sig = if (op != .cas) const fn_sig = if (op != .cas)
try std.fmt.bufPrintZ(&fn_buf, "fn {[name]s}(val: u{[n]s}, ptr: *u{[n]s}) callconv(.C) u{[n]s} {{", .{ .name = name, .n = n.toBits() }) try std.fmt.allocPrint(arena, "fn {[name]s}(val: u{[n]d}, ptr: *u{[n]d}) callconv(.C) u{[n]d} {{", .{
.name = name,
.n = n.toBits(),
})
else else
try std.fmt.bufPrintZ(&fn_buf, "fn {[name]s}(expected: u{[n]s}, desired: u{[n]s}, ptr: *u{[n]s}) callconv(.C) u{[n]s} {{", .{ .name = name, .n = n.toBits() }); try std.fmt.allocPrint(arena, "fn {[name]s}(expected: u{[n]d}, desired: u{[n]d}, ptr: *u{[n]d}) callconv(.C) u{[n]d} {{", .{
try file.writeAll(fn_sig); .name = name,
try file.writeAll( .n = n.toBits(),
});
try w.writeAll(fn_sig);
try w.writeAll(
\\ \\
\\ @setRuntimeSafety(false); \\ @setRuntimeSafety(false);
\\ __init_aarch64_have_lse_atomics();
\\
\\ return asm volatile ( \\ return asm volatile (
\\ \\
); );
var iter = std.mem.split(u8, body, "\n"); var iter = std.mem.split(u8, body, "\n");
while (iter.next()) |line| { while (iter.next()) |line| {
try file.writeAll(" \\\\"); try w.writeAll(" \\\\");
try file.writeAll(line); try w.writeAll(line);
try file.writeAll("\n"); try w.writeAll("\n");
} }
var constraint_buf: [500:0]u8 = undefined;
const constraints = if (op != .cas) const constraints = if (op != .cas)
try std.fmt.bufPrintZ(&constraint_buf, try std.fmt.allocPrint(arena,
\\ : [ret] "={{{[reg]s}0}}" (-> u{[ty]s}), \\ : [ret] "={{{[reg]s}0}}" (-> u{[ty]d}),
\\ : [val] "{{{[reg]s}0}}" (val), \\ : [val] "{{{[reg]s}0}}" (val),
\\ [ptr] "{{x1}}" (ptr), \\ [ptr] "{{x1}}" (ptr),
\\ [__aarch64_have_lse_atomics] "{{w16}}" (__aarch64_have_lse_atomics), \\ [__aarch64_have_lse_atomics] "{{w16}}" (__aarch64_have_lse_atomics),
@ -229,8 +109,8 @@ fn writeFunction(file: *std.fs.File, name: [:0]const u8, op: AtomicOp, n: N, bod
\\ \\
, .{ .reg = n.register(), .ty = n.toBits() }) , .{ .reg = n.register(), .ty = n.toBits() })
else else
try std.fmt.bufPrintZ(&constraint_buf, try std.fmt.allocPrint(arena,
\\ : [ret] "={{{[reg]s}0}}" (-> u{[ty]s}), \\ : [ret] "={{{[reg]s}0}}" (-> u{[ty]d}),
\\ : [expected] "{{{[reg]s}0}}" (expected), \\ : [expected] "{{{[reg]s}0}}" (expected),
\\ [desired] "{{{[reg]s}1}}" (desired), \\ [desired] "{{{[reg]s}1}}" (desired),
\\ [ptr] "{{x2}}" (ptr), \\ [ptr] "{{x2}}" (ptr),
@ -239,55 +119,12 @@ fn writeFunction(file: *std.fs.File, name: [:0]const u8, op: AtomicOp, n: N, bod
\\ \\
, .{ .reg = n.register(), .ty = n.toBits() }); , .{ .reg = n.register(), .ty = n.toBits() });
try file.writeAll(constraints); try w.writeAll(constraints);
try file.writeAll( try w.writeAll(
\\ ); \\ );
\\ \\
); );
try file.writeAll("}\n"); try w.writeAll("}\n");
}
fn writeExport(file: *std.fs.File, pat: [:0]const u8, n: [:0]const u8, order: Ordering) !void {
var darwin_lse_buf: [100:0]u8 = undefined;
var darwin_nolse_buf: [100:0]u8 = undefined;
var nodarwin_lse_buf: [100:0]u8 = undefined;
var nodarwin_nolse_buf: [100:0]u8 = undefined;
var name_buf: [100:0]u8 = undefined;
const darwin_lse = try std.fmt.bufPrintZ(&darwin_lse_buf, "{s}{s}{s}DarwinLse", .{ pat, n, order.capName() });
const darwin_nolse = try std.fmt.bufPrintZ(&darwin_nolse_buf, "{s}{s}{s}DarwinNolse", .{ pat, n, order.capName() });
const nodarwin_lse = try std.fmt.bufPrintZ(&nodarwin_lse_buf, "{s}{s}{s}NondarwinLse", .{ pat, n, order.capName() });
const nodarwin_nolse = try std.fmt.bufPrintZ(&nodarwin_nolse_buf, "{s}{s}{s}NondarwinNolse", .{ pat, n, order.capName() });
const name = try std.fmt.bufPrintZ(&name_buf, "__aarch64_{s}{s}_{s}", .{ pat, n, @tagName(order) });
try file.writeAll("const ");
try file.writeAll(name);
try file.writeAll(
\\ = if (is_darwin)
\\ if (has_lse)
\\
);
try file.writeAll(darwin_lse);
try file.writeAll(
\\
\\ else
\\
);
try file.writeAll(darwin_nolse);
try file.writeAll(
\\
\\else if (has_lse)
\\
);
try file.writeAll(nodarwin_lse);
try file.writeAll(
\\
\\else
\\
);
try file.writeAll(nodarwin_nolse);
try file.writeAll(
\\;
\\
);
} }
const N = enum(u8) { const N = enum(u8) {
@ -298,22 +135,23 @@ const N = enum(u8) {
sixteen = 16, sixteen = 16,
const Defines = struct { const Defines = struct {
s: [:0]const u8, s: []const u8,
uxt: [:0]const u8, uxt: []const u8,
b: [:0]const u8, b: []const u8,
}; };
fn defines(self: @This()) Defines {
const s = switch (self) { fn defines(n: N) Defines {
const s = switch (n) {
.one => "b", .one => "b",
.two => "h", .two => "h",
else => "", else => "",
}; };
const uxt = switch (self) { const uxt = switch (n) {
.one => "uxtb", .one => "uxtb",
.two => "uxth", .two => "uxth",
.four, .eight, .sixteen => "mov", .four, .eight, .sixteen => "mov",
}; };
const b = switch (self) { const b = switch (n) {
.one => "0x00000000", .one => "0x00000000",
.two => "0x40000000", .two => "0x40000000",
.four => "0x80000000", .four => "0x80000000",
@ -327,28 +165,16 @@ const N = enum(u8) {
}; };
} }
fn register(self: @This()) [:0]const u8 { fn register(n: N) []const u8 {
return if (@enumToInt(self) < 8) "w" else "x"; return if (@enumToInt(n) < 8) "w" else "x";
} }
fn toBytes(self: @This()) [:0]const u8 { fn toBytes(n: N) u8 {
return switch (self) { return @enumToInt(n);
.one => "1",
.two => "2",
.four => "4",
.eight => "8",
.sixteen => "16",
};
} }
fn toBits(self: @This()) [:0]const u8 { fn toBits(n: N) u8 {
return switch (self) { return n.toBytes() * 8;
.one => "8",
.two => "16",
.four => "32",
.eight => "64",
.sixteen => "128",
};
} }
}; };
@ -411,31 +237,18 @@ const Ordering = enum {
const LdName = enum { ldadd, ldclr, ldeor, ldset }; const LdName = enum { ldadd, ldclr, ldeor, ldset };
fn generateCas(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u8 { fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
const s_def = n.defines(); const s_def = n.defines();
const o_def = order.defines(); const o_def = order.defines();
var cas_buf = try alloc.create([200:0]u8);
var ldxr_buf = try alloc.create([200:0]u8);
var stxr_buf = try alloc.create([200:0]u8);
defer alloc.destroy(cas_buf);
defer alloc.destroy(ldxr_buf);
defer alloc.destroy(stxr_buf);
var instr_buf = try alloc.create([1000:0]u8);
errdefer alloc.destroy(instr_buf);
const reg = n.register(); const reg = n.register();
if (@enumToInt(n) < 16) { if (@enumToInt(n) < 16) {
const cas = if (lse) blk: { const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}\n", .{ s_def.b, o_def.m });
break :blk try std.fmt.bufPrintZ(cas_buf, const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
\\cas{[a]s}{[l]s}{[s]s} {[reg]s}0, {[reg]s}1, [x2] const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
\\
, .{ .a = o_def.a, .l = o_def.l, .s = s_def.s, .reg = reg });
} else try std.fmt.bufPrintZ(cas_buf, ".inst 0x08a07c41 + {s} + {s}\n", .{ s_def.b, o_def.m });
const ldxr = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xr{s}", .{ o_def.a, s_def.s });
const stxr = try std.fmt.bufPrintZ(stxr_buf, "st{s}xr{s}", .{ o_def.l, s_def.s });
return try std.fmt.bufPrintZ(instr_buf, return try std.fmt.allocPrint(arena,
\\ cbz w16, 8f \\ cbz w16, 8f
\\ {[cas]s} \\ {[cas]s}
\\ cbz wzr, 1f \\ cbz wzr, 1f
@ -456,15 +269,11 @@ fn generateCas(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u
.reg = reg, .reg = reg,
}); });
} else { } else {
const casp = if (lse) const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}\n", .{o_def.m});
try std.fmt.bufPrintZ(cas_buf, "casp{s}{s} x0, x1, x2, x3, [x4]\n", .{ o_def.a, o_def.l }) const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
else const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
try std.fmt.bufPrintZ(cas_buf, ".inst 0x48207c82 + {s}\n", .{o_def.m});
const ldxp = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xp", .{o_def.a}); return try std.fmt.allocPrint(arena,
const stxp = try std.fmt.bufPrintZ(stxr_buf, "st{s}xp", .{o_def.l});
return try std.fmt.bufPrintZ(instr_buf,
\\ cbz w16, 8f \\ cbz w16, 8f
\\ {[casp]s} \\ {[casp]s}
\\ cbz wzr, 1f \\ cbz wzr, 1f
@ -487,53 +296,35 @@ fn generateCas(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u
} }
} }
fn generateSwp(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u8 { fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
const s_def = n.defines(); const s_def = n.defines();
const o_def = order.defines(); const o_def = order.defines();
var swp_buf = try alloc.create([200:0]u8);
var ldxr_buf = try alloc.create([200:0]u8);
var stxr_buf = try alloc.create([200:0]u8);
defer alloc.destroy(swp_buf);
defer alloc.destroy(ldxr_buf);
defer alloc.destroy(stxr_buf);
const reg = n.register(); const reg = n.register();
const swp = if (lse) blk: { return try std.fmt.allocPrint(arena,
break :blk try std.fmt.bufPrintZ(swp_buf,
\\swp{[a]s}{[l]s}{[s]s} {[reg]s}0, {[reg]s}0, [x1]
, .{ .a = o_def.a, .l = o_def.l, .s = s_def.s, .reg = reg });
} else std.fmt.bufPrintZ(swp_buf, ".inst 0x38208020 + {s} + {s}", .{ .b = s_def.b, .n = o_def.n });
const ldxr = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xr{s}", .{ o_def.a, s_def.s });
const stxr = try std.fmt.bufPrintZ(stxr_buf, "st{s}xr{s}", .{ o_def.l, s_def.s });
var instr_buf = try alloc.create([1000:0]u8);
errdefer alloc.destroy(instr_buf);
return try std.fmt.bufPrintZ(instr_buf,
\\ cbz w16, 8f \\ cbz w16, 8f
\\ {[swp]s} \\ .inst 0x38208020 + {[b]s} + {[n]s}
\\ cbz wzr, 1f \\ cbz wzr, 1f
\\8: \\8:
\\ mov {[reg]s}16, {[reg]s}0 \\ mov {[reg]s}16, {[reg]s}0
\\0: \\0:
\\ {[ldxr]s} {[reg]s}0, [x1] \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
\\ {[stxr]s} w17, {[reg]s}16, [x1] \\ st{[l]s}xr{[s]s} w17, {[reg]s}16, [x1]
\\ cbnz w17, 0b \\ cbnz w17, 0b
\\1: \\1:
, .{ , .{
.swp = swp, .b = s_def.b,
.ldxr = ldxr, .n = o_def.n,
.stxr = stxr,
.reg = reg, .reg = reg,
.s = s_def.s,
.a = o_def.a,
.l = o_def.l,
}); });
} }
fn generateLd(alloc: *Allocator, n: N, order: Ordering, ld: LdName, lse: bool) ![:0]const u8 { fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
const s_def = n.defines(); const s_def = n.defines();
const o_def = order.defines(); const o_def = order.defines();
const ldname = @tagName(ld);
const op = switch (ld) { const op = switch (ld) {
.ldadd => "add", .ldadd => "add",
.ldclr => "bic", .ldclr => "bic",
@ -547,47 +338,27 @@ fn generateLd(alloc: *Allocator, n: N, order: Ordering, ld: LdName, lse: bool) !
.ldset => "0x3000", .ldset => "0x3000",
}; };
var swp_buf = try alloc.create([200:0]u8);
var ldop_buf = try alloc.create([200:0]u8);
var ldxr_buf = try alloc.create([200:0]u8);
var stxr_buf = try alloc.create([200:0]u8);
defer alloc.destroy(swp_buf);
defer alloc.destroy(ldop_buf);
defer alloc.destroy(ldxr_buf);
defer alloc.destroy(stxr_buf);
const reg = n.register(); const reg = n.register();
const ldop = if (lse) return try std.fmt.allocPrint(arena,
std.fmt.bufPrintZ(ldop_buf,
\\{[ldnm]s}{[a]s}{[l]s}{[s]s} {[reg]s}0, {[reg]s}0, [x1]
, .{ .ldnm = ldname, .a = o_def.a, .l = o_def.l, .s = s_def.s, .reg = reg })
else
std.fmt.bufPrintZ(ldop_buf,
\\.inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
, .{ .op_n = op_n, .b = s_def.b, .n = o_def.n });
const ldxr = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xr{s}", .{ o_def.a, s_def.s });
const stxr = try std.fmt.bufPrintZ(stxr_buf, "st{s}xr{s}", .{ o_def.l, s_def.s });
var instr_buf = try alloc.create([1000:0]u8);
errdefer alloc.destroy(instr_buf);
return try std.fmt.bufPrintZ(instr_buf,
\\ cbz w16, 8f \\ cbz w16, 8f
\\ {[ldop]s} \\ .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
\\ cbz wzr, 1f \\ cbz wzr, 1f
\\8: \\8:
\\ mov {[reg]s}16, {[reg]s}0 \\ mov {[reg]s}16, {[reg]s}0
\\0: \\0:
\\ {[ldxr]s} {[reg]s}0, [x1] \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
\\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16 \\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16
\\ {[stxr]s} w15, {[reg]s}17, [x1] \\ st{[l]s}xr{[s]s} w15, {[reg]s}17, [x1]
\\ cbnz w15, 0b \\ cbnz w15, 0b
\\1: \\1:
, .{ , .{
.ldop = ldop, .op_n = op_n,
.ldxr = ldxr, .b = s_def.b,
.stxr = stxr, .n = o_def.n,
.s = s_def.s,
.a = o_def.a,
.l = o_def.l,
.op = op, .op = op,
.reg = reg, .reg = reg,
}); });