From 2bbaf95ebea577652bcef474c418bf9a31abf4ce Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 21 Nov 2022 19:17:50 -0500 Subject: [PATCH] Merge pull request #11828 from devins2518/arm-atomics compiler_rt: aarch64 outline atomics --- lib/compiler_rt.zig | 7 + lib/compiler_rt/aarch64_outline_atomics.zig | 2227 +++++++++++++++++++ tools/gen_outline_atomics.zig | 338 +++ 3 files changed, 2572 insertions(+) create mode 100644 lib/compiler_rt/aarch64_outline_atomics.zig create mode 100644 tools/gen_outline_atomics.zig diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index e37fa92789..44146d04e9 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -1,8 +1,15 @@ +const builtin = @import("builtin"); + pub const panic = @import("compiler_rt/common.zig").panic; comptime { _ = @import("compiler_rt/atomics.zig"); + // macOS has these functions inside libSystem. + if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) { + _ = @import("compiler_rt/aarch64_outline_atomics.zig"); + } + _ = @import("compiler_rt/addf3.zig"); _ = @import("compiler_rt/addhf3.zig"); _ = @import("compiler_rt/addsf3.zig"); diff --git a/lib/compiler_rt/aarch64_outline_atomics.zig b/lib/compiler_rt/aarch64_outline_atomics.zig new file mode 100644 index 0000000000..2471a45365 --- /dev/null +++ b/lib/compiler_rt/aarch64_outline_atomics.zig @@ -0,0 +1,2227 @@ +//! This file is generated by tools/gen_outline_atomics.zig. +const builtin = @import("builtin"); +const std = @import("std"); +const linkage = @import("./common.zig").linkage; +const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse); + +/// This default is overridden at runtime after inspecting CPU properties. +/// It is intentionally not exported in order to make the machine code that +/// uses it a statically predicted direct branch rather than using the PLT, +/// which ARM is concerned would have too much overhead. +var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse); + +fn __aarch64_cas1_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x00000000 + 0x000000 + \\ ret + \\8: + \\ uxtb w16, w0 + \\0: + \\ ldxrb w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stxrb w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp1_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x00000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ stxrb w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd1_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x00000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ add w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr1_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x00000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ bic w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor1_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x00000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ eor w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset1_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x00000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ orr w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas1_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x00000000 + 0x400000 + \\ ret + \\8: + \\ uxtb w16, w0 + \\0: + \\ ldaxrb w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stxrb w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp1_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x00000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ stxrb w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd1_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x00000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ add w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr1_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x00000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ bic w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor1_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x00000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ eor w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset1_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x00000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ orr w17, w0, w16 + \\ stxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas1_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x00000000 + 0x008000 + \\ ret + \\8: + \\ uxtb w16, w0 + \\0: + \\ ldxrb w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stlxrb w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp1_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x00000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ stlxrb w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd1_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x00000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ add w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr1_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x00000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ bic w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor1_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x00000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ eor w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset1_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x00000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrb w0, [x1] + \\ orr w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas1_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x00000000 + 0x408000 + \\ ret + \\8: + \\ uxtb w16, w0 + \\0: + \\ ldaxrb w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stlxrb w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp1_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x00000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ stlxrb w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd1_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x00000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ add w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr1_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x00000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ bic w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor1_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x00000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ eor w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset1_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x00000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrb w0, [x1] + \\ orr w17, w0, w16 + \\ stlxrb w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas2_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x40000000 + 0x000000 + \\ ret + \\8: + \\ uxth w16, w0 + \\0: + \\ ldxrh w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stxrh w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp2_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x40000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ stxrh w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd2_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x40000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ add w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr2_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x40000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ bic w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor2_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x40000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ eor w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset2_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x40000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ orr w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas2_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x40000000 + 0x400000 + \\ ret + \\8: + \\ uxth w16, w0 + \\0: + \\ ldaxrh w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stxrh w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp2_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x40000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ stxrh w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd2_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x40000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ add w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr2_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x40000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ bic w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor2_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x40000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ eor w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset2_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x40000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ orr w17, w0, w16 + \\ stxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas2_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x40000000 + 0x008000 + \\ ret + \\8: + \\ uxth w16, w0 + \\0: + \\ ldxrh w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stlxrh w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp2_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x40000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ stlxrh w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd2_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x40000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ add w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr2_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x40000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ bic w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor2_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x40000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ eor w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset2_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x40000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxrh w0, [x1] + \\ orr w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas2_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x40000000 + 0x408000 + \\ ret + \\8: + \\ uxth w16, w0 + \\0: + \\ ldaxrh w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stlxrh w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp2_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x40000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ stlxrh w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd2_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x40000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ add w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr2_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x40000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ bic w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor2_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x40000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ eor w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset2_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x40000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxrh w0, [x1] + \\ orr w17, w0, w16 + \\ stlxrh w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas4_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x80000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stxr w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp4_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x80000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ stxr w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd4_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x80000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ add w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr4_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x80000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ bic w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor4_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x80000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ eor w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset4_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x80000000 + 0x000000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ orr w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas4_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x80000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stxr w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp4_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x80000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ stxr w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd4_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x80000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ add w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr4_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x80000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ bic w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor4_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x80000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ eor w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset4_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x80000000 + 0x800000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ orr w17, w0, w16 + \\ stxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas4_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x80000000 + 0x008000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stlxr w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp4_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x80000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ stlxr w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd4_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x80000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ add w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr4_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x80000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ bic w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor4_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x80000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ eor w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset4_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x80000000 + 0x400000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldxr w0, [x1] + \\ orr w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas4_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0x80000000 + 0x408000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x2] + \\ cmp w0, w16 + \\ bne 1f + \\ stlxr w17, w1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp4_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0x80000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ stlxr w17, w16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd4_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0x80000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ add w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr4_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0x80000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ bic w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor4_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0x80000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ eor w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset4_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0x80000000 + 0xc00000 + \\ ret + \\8: + \\ mov w16, w0 + \\0: + \\ ldaxr w0, [x1] + \\ orr w17, w0, w16 + \\ stlxr w15, w17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas8_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0xc0000000 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x2] + \\ cmp x0, x16 + \\ bne 1f + \\ stxr w17, x1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp8_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0xc0000000 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ stxr w17, x16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd8_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ add x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr8_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ bic x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor8_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ eor x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset8_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ orr x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas8_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0xc0000000 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x2] + \\ cmp x0, x16 + \\ bne 1f + \\ stxr w17, x1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp8_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0xc0000000 + 0x800000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ stxr w17, x16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd8_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x800000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ add x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr8_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x800000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ bic x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor8_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x800000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ eor x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset8_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x800000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ orr x17, x0, x16 + \\ stxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas8_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0xc0000000 + 0x008000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x2] + \\ cmp x0, x16 + \\ bne 1f + \\ stlxr w17, x1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp8_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0xc0000000 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ stlxr w17, x16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd8_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ add x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr8_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ bic x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor8_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ eor x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset8_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldxr x0, [x1] + \\ orr x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas8_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x08a07c41 + 0xc0000000 + 0x408000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x2] + \\ cmp x0, x16 + \\ bne 1f + \\ stlxr w17, x1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_swp8_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38208020 + 0xc0000000 + 0xc00000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ stlxr w17, x16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldadd8_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x0000 + 0xc0000000 + 0xc00000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ add x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldclr8_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x1000 + 0xc0000000 + 0xc00000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ bic x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldeor8_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x2000 + 0xc0000000 + 0xc00000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ eor x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_ldset8_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x38200020 + 0x3000 + 0xc0000000 + 0xc00000 + \\ ret + \\8: + \\ mov x16, x0 + \\0: + \\ ldaxr x0, [x1] + \\ orr x17, x0, x16 + \\ stlxr w15, x17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas16_relax() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x48207c82 + 0x000000 + \\ ret + \\8: + \\ mov x16, x0 + \\ mov x17, x1 + \\0: + \\ ldxp x0, x1, [x4] + \\ cmp x0, x16 + \\ ccmp x1, x17, #0, eq + \\ bne 1f + \\ stxp w15, x2, x3, [x4] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas16_acq() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x48207c82 + 0x400000 + \\ ret + \\8: + \\ mov x16, x0 + \\ mov x17, x1 + \\0: + \\ ldaxp x0, x1, [x4] + \\ cmp x0, x16 + \\ ccmp x1, x17, #0, eq + \\ bne 1f + \\ stxp w15, x2, x3, [x4] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas16_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x48207c82 + 0x008000 + \\ ret + \\8: + \\ mov x16, x0 + \\ mov x17, x1 + \\0: + \\ ldxp x0, x1, [x4] + \\ cmp x0, x16 + \\ ccmp x1, x17, #0, eq + \\ bne 1f + \\ stlxp w15, x2, x3, [x4] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} +fn __aarch64_cas16_acq_rel() align(16) callconv(.Naked) void { + @setRuntimeSafety(false); + asm volatile ( + \\ cbz w16, 8f + \\ .inst 0x48207c82 + 0x408000 + \\ ret + \\8: + \\ mov x16, x0 + \\ mov x17, x1 + \\0: + \\ ldaxp x0, x1, [x4] + \\ cmp x0, x16 + \\ ccmp x1, x17, #0, eq + \\ bne 1f + \\ stlxp w15, x2, x3, [x4] + \\ cbnz w15, 0b + \\1: + \\ ret + : + : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + : "w15", "w16", "w17", "memory" + ); + unreachable; +} + +comptime { + @export(__aarch64_cas1_relax, .{ .name = "__aarch64_cas1_relax", .linkage = linkage }); + @export(__aarch64_swp1_relax, .{ .name = "__aarch64_swp1_relax", .linkage = linkage }); + @export(__aarch64_ldadd1_relax, .{ .name = "__aarch64_ldadd1_relax", .linkage = linkage }); + @export(__aarch64_ldclr1_relax, .{ .name = "__aarch64_ldclr1_relax", .linkage = linkage }); + @export(__aarch64_ldeor1_relax, .{ .name = "__aarch64_ldeor1_relax", .linkage = linkage }); + @export(__aarch64_ldset1_relax, .{ .name = "__aarch64_ldset1_relax", .linkage = linkage }); + @export(__aarch64_cas1_acq, .{ .name = "__aarch64_cas1_acq", .linkage = linkage }); + @export(__aarch64_swp1_acq, .{ .name = "__aarch64_swp1_acq", .linkage = linkage }); + @export(__aarch64_ldadd1_acq, .{ .name = "__aarch64_ldadd1_acq", .linkage = linkage }); + @export(__aarch64_ldclr1_acq, .{ .name = "__aarch64_ldclr1_acq", .linkage = linkage }); + @export(__aarch64_ldeor1_acq, .{ .name = "__aarch64_ldeor1_acq", .linkage = linkage }); + @export(__aarch64_ldset1_acq, .{ .name = "__aarch64_ldset1_acq", .linkage = linkage }); + @export(__aarch64_cas1_rel, .{ .name = "__aarch64_cas1_rel", .linkage = linkage }); + @export(__aarch64_swp1_rel, .{ .name = "__aarch64_swp1_rel", .linkage = linkage }); + @export(__aarch64_ldadd1_rel, .{ .name = "__aarch64_ldadd1_rel", .linkage = linkage }); + @export(__aarch64_ldclr1_rel, .{ .name = "__aarch64_ldclr1_rel", .linkage = linkage }); + @export(__aarch64_ldeor1_rel, .{ .name = "__aarch64_ldeor1_rel", .linkage = linkage }); + @export(__aarch64_ldset1_rel, .{ .name = "__aarch64_ldset1_rel", .linkage = linkage }); + @export(__aarch64_cas1_acq_rel, .{ .name = "__aarch64_cas1_acq_rel", .linkage = linkage }); + @export(__aarch64_swp1_acq_rel, .{ .name = "__aarch64_swp1_acq_rel", .linkage = linkage }); + @export(__aarch64_ldadd1_acq_rel, .{ .name = "__aarch64_ldadd1_acq_rel", .linkage = linkage }); + @export(__aarch64_ldclr1_acq_rel, .{ .name = "__aarch64_ldclr1_acq_rel", .linkage = linkage }); + @export(__aarch64_ldeor1_acq_rel, .{ .name = "__aarch64_ldeor1_acq_rel", .linkage = linkage }); + @export(__aarch64_ldset1_acq_rel, .{ .name = "__aarch64_ldset1_acq_rel", .linkage = linkage }); + @export(__aarch64_cas2_relax, .{ .name = "__aarch64_cas2_relax", .linkage = linkage }); + @export(__aarch64_swp2_relax, .{ .name = "__aarch64_swp2_relax", .linkage = linkage }); + @export(__aarch64_ldadd2_relax, .{ .name = "__aarch64_ldadd2_relax", .linkage = linkage }); + @export(__aarch64_ldclr2_relax, .{ .name = "__aarch64_ldclr2_relax", .linkage = linkage }); + @export(__aarch64_ldeor2_relax, .{ .name = "__aarch64_ldeor2_relax", .linkage = linkage }); + @export(__aarch64_ldset2_relax, .{ .name = "__aarch64_ldset2_relax", .linkage = linkage }); + @export(__aarch64_cas2_acq, .{ .name = "__aarch64_cas2_acq", .linkage = linkage }); + @export(__aarch64_swp2_acq, .{ .name = "__aarch64_swp2_acq", .linkage = linkage }); + @export(__aarch64_ldadd2_acq, .{ .name = "__aarch64_ldadd2_acq", .linkage = linkage }); + @export(__aarch64_ldclr2_acq, .{ .name = "__aarch64_ldclr2_acq", .linkage = linkage }); + @export(__aarch64_ldeor2_acq, .{ .name = "__aarch64_ldeor2_acq", .linkage = linkage }); + @export(__aarch64_ldset2_acq, .{ .name = "__aarch64_ldset2_acq", .linkage = linkage }); + @export(__aarch64_cas2_rel, .{ .name = "__aarch64_cas2_rel", .linkage = linkage }); + @export(__aarch64_swp2_rel, .{ .name = "__aarch64_swp2_rel", .linkage = linkage }); + @export(__aarch64_ldadd2_rel, .{ .name = "__aarch64_ldadd2_rel", .linkage = linkage }); + @export(__aarch64_ldclr2_rel, .{ .name = "__aarch64_ldclr2_rel", .linkage = linkage }); + @export(__aarch64_ldeor2_rel, .{ .name = "__aarch64_ldeor2_rel", .linkage = linkage }); + @export(__aarch64_ldset2_rel, .{ .name = "__aarch64_ldset2_rel", .linkage = linkage }); + @export(__aarch64_cas2_acq_rel, .{ .name = "__aarch64_cas2_acq_rel", .linkage = linkage }); + @export(__aarch64_swp2_acq_rel, .{ .name = "__aarch64_swp2_acq_rel", .linkage = linkage }); + @export(__aarch64_ldadd2_acq_rel, .{ .name = "__aarch64_ldadd2_acq_rel", .linkage = linkage }); + @export(__aarch64_ldclr2_acq_rel, .{ .name = "__aarch64_ldclr2_acq_rel", .linkage = linkage }); + @export(__aarch64_ldeor2_acq_rel, .{ .name = "__aarch64_ldeor2_acq_rel", .linkage = linkage }); + @export(__aarch64_ldset2_acq_rel, .{ .name = "__aarch64_ldset2_acq_rel", .linkage = linkage }); + @export(__aarch64_cas4_relax, .{ .name = "__aarch64_cas4_relax", .linkage = linkage }); + @export(__aarch64_swp4_relax, .{ .name = "__aarch64_swp4_relax", .linkage = linkage }); + @export(__aarch64_ldadd4_relax, .{ .name = "__aarch64_ldadd4_relax", .linkage = linkage }); + @export(__aarch64_ldclr4_relax, .{ .name = "__aarch64_ldclr4_relax", .linkage = linkage }); + @export(__aarch64_ldeor4_relax, .{ .name = "__aarch64_ldeor4_relax", .linkage = linkage }); + @export(__aarch64_ldset4_relax, .{ .name = "__aarch64_ldset4_relax", .linkage = linkage }); + @export(__aarch64_cas4_acq, .{ .name = "__aarch64_cas4_acq", .linkage = linkage }); + @export(__aarch64_swp4_acq, .{ .name = "__aarch64_swp4_acq", .linkage = linkage }); + @export(__aarch64_ldadd4_acq, .{ .name = "__aarch64_ldadd4_acq", .linkage = linkage }); + @export(__aarch64_ldclr4_acq, .{ .name = "__aarch64_ldclr4_acq", .linkage = linkage }); + @export(__aarch64_ldeor4_acq, .{ .name = "__aarch64_ldeor4_acq", .linkage = linkage }); + @export(__aarch64_ldset4_acq, .{ .name = "__aarch64_ldset4_acq", .linkage = linkage }); + @export(__aarch64_cas4_rel, .{ .name = "__aarch64_cas4_rel", .linkage = linkage }); + @export(__aarch64_swp4_rel, .{ .name = "__aarch64_swp4_rel", .linkage = linkage }); + @export(__aarch64_ldadd4_rel, .{ .name = "__aarch64_ldadd4_rel", .linkage = linkage }); + @export(__aarch64_ldclr4_rel, .{ .name = "__aarch64_ldclr4_rel", .linkage = linkage }); + @export(__aarch64_ldeor4_rel, .{ .name = "__aarch64_ldeor4_rel", .linkage = linkage }); + @export(__aarch64_ldset4_rel, .{ .name = "__aarch64_ldset4_rel", .linkage = linkage }); + @export(__aarch64_cas4_acq_rel, .{ .name = "__aarch64_cas4_acq_rel", .linkage = linkage }); + @export(__aarch64_swp4_acq_rel, .{ .name = "__aarch64_swp4_acq_rel", .linkage = linkage }); + @export(__aarch64_ldadd4_acq_rel, .{ .name = "__aarch64_ldadd4_acq_rel", .linkage = linkage }); + @export(__aarch64_ldclr4_acq_rel, .{ .name = "__aarch64_ldclr4_acq_rel", .linkage = linkage }); + @export(__aarch64_ldeor4_acq_rel, .{ .name = "__aarch64_ldeor4_acq_rel", .linkage = linkage }); + @export(__aarch64_ldset4_acq_rel, .{ .name = "__aarch64_ldset4_acq_rel", .linkage = linkage }); + @export(__aarch64_cas8_relax, .{ .name = "__aarch64_cas8_relax", .linkage = linkage }); + @export(__aarch64_swp8_relax, .{ .name = "__aarch64_swp8_relax", .linkage = linkage }); + @export(__aarch64_ldadd8_relax, .{ .name = "__aarch64_ldadd8_relax", .linkage = linkage }); + @export(__aarch64_ldclr8_relax, .{ .name = "__aarch64_ldclr8_relax", .linkage = linkage }); + @export(__aarch64_ldeor8_relax, .{ .name = "__aarch64_ldeor8_relax", .linkage = linkage }); + @export(__aarch64_ldset8_relax, .{ .name = "__aarch64_ldset8_relax", .linkage = linkage }); + @export(__aarch64_cas8_acq, .{ .name = "__aarch64_cas8_acq", .linkage = linkage }); + @export(__aarch64_swp8_acq, .{ .name = "__aarch64_swp8_acq", .linkage = linkage }); + @export(__aarch64_ldadd8_acq, .{ .name = "__aarch64_ldadd8_acq", .linkage = linkage }); + @export(__aarch64_ldclr8_acq, .{ .name = "__aarch64_ldclr8_acq", .linkage = linkage }); + @export(__aarch64_ldeor8_acq, .{ .name = "__aarch64_ldeor8_acq", .linkage = linkage }); + @export(__aarch64_ldset8_acq, .{ .name = "__aarch64_ldset8_acq", .linkage = linkage }); + @export(__aarch64_cas8_rel, .{ .name = "__aarch64_cas8_rel", .linkage = linkage }); + @export(__aarch64_swp8_rel, .{ .name = "__aarch64_swp8_rel", .linkage = linkage }); + @export(__aarch64_ldadd8_rel, .{ .name = "__aarch64_ldadd8_rel", .linkage = linkage }); + @export(__aarch64_ldclr8_rel, .{ .name = "__aarch64_ldclr8_rel", .linkage = linkage }); + @export(__aarch64_ldeor8_rel, .{ .name = "__aarch64_ldeor8_rel", .linkage = linkage }); + @export(__aarch64_ldset8_rel, .{ .name = "__aarch64_ldset8_rel", .linkage = linkage }); + @export(__aarch64_cas8_acq_rel, .{ .name = "__aarch64_cas8_acq_rel", .linkage = linkage }); + @export(__aarch64_swp8_acq_rel, .{ .name = "__aarch64_swp8_acq_rel", .linkage = linkage }); + @export(__aarch64_ldadd8_acq_rel, .{ .name = "__aarch64_ldadd8_acq_rel", .linkage = linkage }); + @export(__aarch64_ldclr8_acq_rel, .{ .name = "__aarch64_ldclr8_acq_rel", .linkage = linkage }); + @export(__aarch64_ldeor8_acq_rel, .{ .name = "__aarch64_ldeor8_acq_rel", .linkage = linkage }); + @export(__aarch64_ldset8_acq_rel, .{ .name = "__aarch64_ldset8_acq_rel", .linkage = linkage }); + @export(__aarch64_cas16_relax, .{ .name = "__aarch64_cas16_relax", .linkage = linkage }); + @export(__aarch64_cas16_acq, .{ .name = "__aarch64_cas16_acq", .linkage = linkage }); + @export(__aarch64_cas16_rel, .{ .name = "__aarch64_cas16_rel", .linkage = linkage }); + @export(__aarch64_cas16_acq_rel, .{ .name = "__aarch64_cas16_acq_rel", .linkage = linkage }); +} diff --git a/tools/gen_outline_atomics.zig b/tools/gen_outline_atomics.zig new file mode 100644 index 0000000000..c04591d032 --- /dev/null +++ b/tools/gen_outline_atomics.zig @@ -0,0 +1,338 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const AtomicOp = enum { + cas, + swp, + ldadd, + ldclr, + ldeor, + ldset, +}; + +pub fn main() !void { + var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + //const args = try std.process.argsAlloc(arena); + + var bw = std.io.bufferedWriter(std.io.getStdOut().writer()); + const w = bw.writer(); + + try w.writeAll( + \\//! This file is generated by tools/gen_outline_atomics.zig. + \\const builtin = @import("builtin"); + \\const std = @import("std"); + \\const linkage = @import("./common.zig").linkage; + \\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse); + \\ + \\/// This default is overridden at runtime after inspecting CPU properties. + \\/// It is intentionally not exported in order to make the machine code that + \\/// uses it a statically predicted direct branch rather than using the PLT, + \\/// which ARM is concerned would have too much overhead. + \\var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse); + \\ + \\ + ); + + var footer = std.ArrayList(u8).init(arena); + try footer.appendSlice("\ncomptime {\n"); + + for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| { + for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| { + for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| { + if (n == .sixteen and op != .cas) continue; + + const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{ + @tagName(op), n.toBytes(), @tagName(order), + }); + try writeFunction(arena, w, name, op, n, order); + try footer.writer().print(" @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{ + name, name, + }); + } + } + } + + try w.writeAll(footer.items); + try w.writeAll("}\n"); + try bw.flush(); +} + +fn writeFunction( + arena: Allocator, + w: anytype, + name: []const u8, + op: AtomicOp, + n: N, + order: Ordering, +) !void { + const body = switch (op) { + .cas => try generateCas(arena, n, order), + .swp => try generateSwp(arena, n, order), + .ldadd => try generateLd(arena, n, order, .ldadd), + .ldclr => try generateLd(arena, n, order, .ldclr), + .ldeor => try generateLd(arena, n, order, .ldeor), + .ldset => try generateLd(arena, n, order, .ldset), + }; + const fn_sig = try std.fmt.allocPrint( + arena, + "fn {[name]s}() align(16) callconv(.Naked) void {{", + .{ .name = name }, + ); + try w.writeAll(fn_sig); + try w.writeAll( + \\ + \\ @setRuntimeSafety(false); + \\ asm volatile ( + \\ + ); + var iter = std.mem.split(u8, body, "\n"); + while (iter.next()) |line| { + try w.writeAll(" \\\\"); + try w.writeAll(line); + try w.writeAll("\n"); + } + try w.writeAll( + \\ : + \\ : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), + \\ : "w15", "w16", "w17", "memory" + \\ ); + \\ unreachable; + \\} + \\ + ); +} + +const N = enum(u8) { + one = 1, + two = 2, + four = 4, + eight = 8, + sixteen = 16, + + const Defines = struct { + s: []const u8, + uxt: []const u8, + b: []const u8, + }; + + fn defines(n: N) Defines { + const s = switch (n) { + .one => "b", + .two => "h", + else => "", + }; + const uxt = switch (n) { + .one => "uxtb", + .two => "uxth", + .four, .eight, .sixteen => "mov", + }; + const b = switch (n) { + .one => "0x00000000", + .two => "0x40000000", + .four => "0x80000000", + .eight => "0xc0000000", + else => "0x00000000", + }; + return Defines{ + .s = s, + .uxt = uxt, + .b = b, + }; + } + + fn register(n: N) []const u8 { + return if (@enumToInt(n) < 8) "w" else "x"; + } + + fn toBytes(n: N) u8 { + return @enumToInt(n); + } + + fn toBits(n: N) u8 { + return n.toBytes() * 8; + } +}; + +const Ordering = enum { + relax, + acq, + rel, + acq_rel, + + const Defines = struct { + suff: []const u8, + a: []const u8, + l: []const u8, + m: []const u8, + n: []const u8, + }; + fn defines(self: @This()) Defines { + const suff = switch (self) { + .relax => "_relax", + .acq => "_acq", + .rel => "_rel", + .acq_rel => "_acq_rel", + }; + const a = switch (self) { + .relax => "", + .acq => "a", + .rel => "", + .acq_rel => "a", + }; + const l = switch (self) { + .relax => "", + .acq => "", + .rel => "l", + .acq_rel => "l", + }; + const m = switch (self) { + .relax => "0x000000", + .acq => "0x400000", + .rel => "0x008000", + .acq_rel => "0x408000", + }; + const n = switch (self) { + .relax => "0x000000", + .acq => "0x800000", + .rel => "0x400000", + .acq_rel => "0xc00000", + }; + return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n }; + } +}; + +const LdName = enum { ldadd, ldclr, ldeor, ldset }; + +fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 { + const s_def = n.defines(); + const o_def = order.defines(); + + const reg = n.register(); + + if (@enumToInt(n) < 16) { + const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m }); + const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s }); + const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s }); + + return try std.fmt.allocPrint(arena, + \\ cbz w16, 8f + \\ {[cas]s} + \\ ret + \\8: + \\ {[uxt]s} {[reg]s}16, {[reg]s}0 + \\0: + \\ {[ldxr]s} {[reg]s}0, [x2] + \\ cmp {[reg]s}0, {[reg]s}16 + \\ bne 1f + \\ {[stxr]s} w17, {[reg]s}1, [x2] + \\ cbnz w17, 0b + \\1: + \\ ret + , .{ + .cas = cas, + .uxt = s_def.uxt, + .ldxr = ldxr, + .stxr = stxr, + .reg = reg, + }); + } else { + const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m}); + const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a}); + const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l}); + + return try std.fmt.allocPrint(arena, + \\ cbz w16, 8f + \\ {[casp]s} + \\ ret + \\8: + \\ mov x16, x0 + \\ mov x17, x1 + \\0: + \\ {[ldxp]s} x0, x1, [x4] + \\ cmp x0, x16 + \\ ccmp x1, x17, #0, eq + \\ bne 1f + \\ {[stxp]s} w15, x2, x3, [x4] + \\ cbnz w15, 0b + \\1: + \\ ret + , .{ + .casp = casp, + .ldxp = ldxp, + .stxp = stxp, + }); + } +} + +fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 { + const s_def = n.defines(); + const o_def = order.defines(); + const reg = n.register(); + + return try std.fmt.allocPrint(arena, + \\ cbz w16, 8f + \\ .inst 0x38208020 + {[b]s} + {[n]s} + \\ ret + \\8: + \\ mov {[reg]s}16, {[reg]s}0 + \\0: + \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1] + \\ st{[l]s}xr{[s]s} w17, {[reg]s}16, [x1] + \\ cbnz w17, 0b + \\1: + \\ ret + , .{ + .b = s_def.b, + .n = o_def.n, + .reg = reg, + .s = s_def.s, + .a = o_def.a, + .l = o_def.l, + }); +} + +fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 { + const s_def = n.defines(); + const o_def = order.defines(); + const op = switch (ld) { + .ldadd => "add", + .ldclr => "bic", + .ldeor => "eor", + .ldset => "orr", + }; + const op_n = switch (ld) { + .ldadd => "0x0000", + .ldclr => "0x1000", + .ldeor => "0x2000", + .ldset => "0x3000", + }; + + const reg = n.register(); + + return try std.fmt.allocPrint(arena, + \\ cbz w16, 8f + \\ .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s} + \\ ret + \\8: + \\ mov {[reg]s}16, {[reg]s}0 + \\0: + \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1] + \\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16 + \\ st{[l]s}xr{[s]s} w15, {[reg]s}17, [x1] + \\ cbnz w15, 0b + \\1: + \\ ret + , .{ + .op_n = op_n, + .b = s_def.b, + .n = o_def.n, + .s = s_def.s, + .a = o_def.a, + .l = o_def.l, + .op = op, + .reg = reg, + }); +}