From 389d1177a57a442b7814d9fdede2a088c614b69d Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Tue, 4 May 2021 18:43:31 +0200
Subject: [PATCH 1/3] stage1: Fix LLVM error in inline asm invocation

Pointer types need an extra indirection layer during the generation of
the function prototype for inline asm blocks.

Closes #3606
---
 src/stage1/codegen.cpp       |  4 +++-
 test/stage1/behavior/asm.zig | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index ede15e4394..015a64f68a 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -4880,6 +4880,9 @@ static LLVMValueRef ir_render_asm_gen(CodeGen *g, IrExecutableGen *executable, I
                 type_ref = get_llvm_type(g, wider_type);
                 value_ref = gen_widen_or_shorten(g, false, type, wider_type, value_ref);
             }
+        } else if (handle_is_ptr(g, type)) {
+            ZigType *gen_type = get_pointer_to_type(g, type, true);
+            type_ref = get_llvm_type(g, gen_type);
         }
 
         param_types[param_index] = type_ref;
@@ -9296,7 +9299,6 @@ static void init(CodeGen *g) {
     char *layout_str = LLVMCopyStringRepOfTargetData(g->target_data_ref);
     LLVMSetDataLayout(g->module, layout_str);
 
-
     assert(g->pointer_size_bytes == LLVMPointerSize(g->target_data_ref));
     g->is_big_endian = (LLVMByteOrder(g->target_data_ref) == LLVMBigEndian);
 
diff --git a/test/stage1/behavior/asm.zig b/test/stage1/behavior/asm.zig
index 170ad3325d..ade774910d 100644
--- a/test/stage1/behavior/asm.zig
+++ b/test/stage1/behavior/asm.zig
@@ -87,6 +87,21 @@ test "sized integer/float in asm input" {
     );
 }
 
+test "struct/array/union types as input values" {
+    asm volatile (""
+        :
+        : [_] "m" (@as([1]u32, undefined))
+    ); // fails
+    asm volatile (""
+        :
+        : [_] "m" (@as(struct { x: u32, y: u8 }, undefined))
+    ); // fails
+    asm volatile (""
+        :
+        : [_] "m" (@as(union { x: u32, y: u8 }, undefined))
+    ); // fails
+}
+
 extern fn this_is_my_alias() i32;
 
 export fn derp() i32 {

From 4bf093f1a00e481d923452955ab9c394c30b8694 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Tue, 4 May 2021 18:45:52 +0200
Subject: [PATCH 2/3] compiler-rt: Better selection of __clzsi implementation

To be honest all this detection logic is starting to become a real PITA,
the ARM32 version can be possibly removed as the generic version
optimizes pretty well...
---
 lib/std/special/compiler_rt/clzsi2.zig      | 27 +++++++++++++++------
 lib/std/special/compiler_rt/clzsi2_test.zig |  2 ++
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/lib/std/special/compiler_rt/clzsi2.zig b/lib/std/special/compiler_rt/clzsi2.zig
index c10786b462..d7464d5ea9 100644
--- a/lib/std/special/compiler_rt/clzsi2.zig
+++ b/lib/std/special/compiler_rt/clzsi2.zig
@@ -26,6 +26,8 @@ fn __clzsi2_generic(a: i32) callconv(.C) i32 {
 }
 
 fn __clzsi2_thumb1() callconv(.Naked) void {
+    @setRuntimeSafety(false);
+
     // Similar to the generic version with the last two rounds replaced by a LUT
     asm volatile (
         \\ movs r1, #32
@@ -58,6 +60,8 @@ fn __clzsi2_thumb1() callconv(.Naked) void {
 }
 
 fn __clzsi2_arm32() callconv(.Naked) void {
+    @setRuntimeSafety(false);
+
     asm volatile (
         \\ // Assumption: n != 0
         \\ // r0: n
@@ -104,13 +108,22 @@ fn __clzsi2_arm32() callconv(.Naked) void {
     unreachable;
 }
 
-pub const __clzsi2 = switch (std.Target.current.cpu.arch) {
-    .arm, .armeb => if (std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm))
-        __clzsi2_thumb1
-    else
-        __clzsi2_arm32,
-    .thumb, .thumbeb => __clzsi2_thumb1,
-    else => __clzsi2_generic,
+pub const __clzsi2 = impl: {
+    switch (std.Target.current.cpu.arch) {
+        .arm, .armeb, .thumb, .thumbeb => {
+            const use_thumb1 =
+                (std.Target.current.cpu.arch.isThumb() or
+                std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm)) and
+                !std.Target.arm.featureSetHas(std.Target.current.cpu.features, .thumb2);
+
+            if (use_thumb1) break :impl __clzsi2_thumb1
+            // From here on we're either targeting Thumb2 or ARM.
+            else if (!std.Target.current.cpu.arch.isThumb()) break :impl __clzsi2_arm32
+            // Use the generic implementation otherwise.
+            else break :impl __clzsi2_generic;
+        },
+        else => break :impl __clzsi2_generic,
+    }
 };
 
 test "test clzsi2" {
diff --git a/lib/std/special/compiler_rt/clzsi2_test.zig b/lib/std/special/compiler_rt/clzsi2_test.zig
index 2b860afd22..c74a1c3ec2 100644
--- a/lib/std/special/compiler_rt/clzsi2_test.zig
+++ b/lib/std/special/compiler_rt/clzsi2_test.zig
@@ -7,6 +7,8 @@ const clzsi2 = @import("clzsi2.zig");
 const testing = @import("std").testing;
 
 fn test__clzsi2(a: u32, expected: i32) void {
+    // XXX At high optimization levels this test may be horribly miscompiled if
+    // one of the naked implementations is selected.
     var nakedClzsi2 = clzsi2.__clzsi2;
     var actualClzsi2 = @ptrCast(fn (a: i32) callconv(.C) i32, nakedClzsi2);
     var x = @bitCast(i32, a);

From afbcb6209dbe6812679324aab564884085b8cf44 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Tue, 4 May 2021 18:52:53 +0200
Subject: [PATCH 3/3] std: Initial bringup for Linux on Thumb2

There are some small problems here and there, mostly due to the pointers
having the lsb set and disrupting the fn alignment tests and the
`@FrameSize` implementation.
---
 lib/std/os/bits/linux.zig         |   2 +-
 lib/std/os/linux.zig              |   1 +
 lib/std/os/linux/thumb.zig        | 168 ++++++++++++++++++++++++++++++
 lib/std/os/linux/tls.zig          |   6 +-
 lib/std/special/c.zig             |   2 +-
 lib/std/start.zig                 |   2 +-
 lib/std/zig/system.zig            |   9 ++
 test/stage1/behavior/align.zig    |   3 +
 test/stage1/behavior/async_fn.zig |   3 +
 test/stage1/behavior/atomics.zig  |   5 +-
 10 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 lib/std/os/linux/thumb.zig

diff --git a/lib/std/os/bits/linux.zig b/lib/std/os/bits/linux.zig
index 94da5cc99a..97cdbef782 100644
--- a/lib/std/os/bits/linux.zig
+++ b/lib/std/os/bits/linux.zig
@@ -18,7 +18,7 @@ pub usingnamespace switch (builtin.arch) {
     .i386 => @import("linux/i386.zig"),
     .x86_64 => @import("linux/x86_64.zig"),
     .aarch64 => @import("linux/arm64.zig"),
-    .arm => @import("linux/arm-eabi.zig"),
+    .arm, .thumb => @import("linux/arm-eabi.zig"),
     .riscv64 => @import("linux/riscv64.zig"),
     .sparcv9 => @import("linux/sparc64.zig"),
     .mips, .mipsel => @import("linux/mips.zig"),
diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig
index 4a67ca7685..6c88d9eae1 100644
--- a/lib/std/os/linux.zig
+++ b/lib/std/os/linux.zig
@@ -23,6 +23,7 @@ pub usingnamespace switch (builtin.arch) {
     .x86_64 => @import("linux/x86_64.zig"),
     .aarch64 => @import("linux/arm64.zig"),
     .arm => @import("linux/arm-eabi.zig"),
+    .thumb => @import("linux/thumb.zig"),
     .riscv64 => @import("linux/riscv64.zig"),
     .sparcv9 => @import("linux/sparc64.zig"),
     .mips, .mipsel => @import("linux/mips.zig"),
diff --git a/lib/std/os/linux/thumb.zig b/lib/std/os/linux/thumb.zig
new file mode 100644
index 0000000000..5db9d2cbf4
--- /dev/null
+++ b/lib/std/os/linux/thumb.zig
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+usingnamespace @import("../bits.zig");
+
+// The syscall interface is identical to the ARM one but we're facing an extra
+// challenge: r7, the register where the syscall number is stored, may be
+// reserved for the frame pointer.
+// Save and restore r7 around the syscall without touching the stack pointer not
+// to break the frame chain.
+
+pub fn syscall0(number: SYS) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r1}" (buf)
+        : "memory"
+    );
+}
+
+pub fn syscall1(number: SYS, arg1: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r1}" (buf),
+          [arg1] "{r0}" (arg1)
+        : "memory"
+    );
+}
+
+pub fn syscall2(number: SYS, arg1: usize, arg2: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r2}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2)
+        : "memory"
+    );
+}
+
+pub fn syscall3(number: SYS, arg1: usize, arg2: usize, arg3: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r3}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3)
+        : "memory"
+    );
+}
+
+pub fn syscall4(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r4}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3),
+          [arg4] "{r3}" (arg4)
+        : "memory"
+    );
+}
+
+pub fn syscall5(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r5}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3),
+          [arg4] "{r3}" (arg4),
+          [arg5] "{r4}" (arg5)
+        : "memory"
+    );
+}
+
+pub fn syscall6(
+    number: SYS,
+    arg1: usize,
+    arg2: usize,
+    arg3: usize,
+    arg4: usize,
+    arg5: usize,
+    arg6: usize,
+) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r6}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3),
+          [arg4] "{r3}" (arg4),
+          [arg5] "{r4}" (arg5),
+          [arg6] "{r5}" (arg6)
+        : "memory"
+    );
+}
+
+/// This matches the libc clone function.
+pub extern fn clone(func: fn (arg: usize) callconv(.C) u8, stack: usize, flags: u32, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize;
+
+pub fn restore() callconv(.Naked) void {
+    return asm volatile (
+        \\ mov r7, %[number]
+        \\ svc #0
+        :
+        : [number] "I" (@enumToInt(SYS.sigreturn))
+    );
+}
+
+pub fn restore_rt() callconv(.Naked) void {
+    return asm volatile (
+        \\ mov r7, %[number]
+        \\ svc #0
+        :
+        : [number] "I" (@enumToInt(SYS.rt_sigreturn))
+        : "memory"
+    );
+}
diff --git a/lib/std/os/linux/tls.zig b/lib/std/os/linux/tls.zig
index 4a36b0d485..0830dcbfda 100644
--- a/lib/std/os/linux/tls.zig
+++ b/lib/std/os/linux/tls.zig
@@ -53,7 +53,7 @@ const TLSVariant = enum {
 };
 
 const tls_variant = switch (builtin.arch) {
-    .arm, .armeb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => TLSVariant.VariantI,
+    .arm, .armeb, .thumb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => TLSVariant.VariantI,
     .x86_64, .i386, .sparcv9 => TLSVariant.VariantII,
     else => @compileError("undefined tls_variant for this architecture"),
 };
@@ -62,7 +62,7 @@ const tls_variant = switch (builtin.arch) {
 const tls_tcb_size = switch (builtin.arch) {
     // ARM EABI mandates enough space for two pointers: the first one points to
     // the DTV while the second one is unspecified but reserved
-    .arm, .armeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
+    .arm, .armeb, .thumb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
     // One pointer-sized word that points either to the DTV or the TCB itself
     else => @sizeOf(usize),
 };
@@ -150,7 +150,7 @@ pub fn setThreadPointer(addr: usize) void {
                 : [addr] "r" (addr)
             );
         },
-        .arm => {
+        .arm, .thumb => {
             const rc = std.os.linux.syscall1(.set_tls, addr);
             assert(rc == 0);
         },
diff --git a/lib/std/special/c.zig b/lib/std/special/c.zig
index c7084f3a11..29feae830f 100644
--- a/lib/std/special/c.zig
+++ b/lib/std/special/c.zig
@@ -385,7 +385,7 @@ fn clone() callconv(.Naked) void {
                 \\      svc #0
             );
         },
-        .arm => {
+        .arm, .thumb => {
             // __clone(func, stack, flags, arg, ptid, tls, ctid)
             //           r0,    r1,    r2,  r3,   +0,  +4,   +8
 
diff --git a/lib/std/start.zig b/lib/std/start.zig
index 89f5eb0b1f..e1e331a682 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -176,7 +176,7 @@ fn _start() callconv(.Naked) noreturn {
                 : [argc] "={esp}" (-> [*]usize)
             );
         },
-        .aarch64, .aarch64_be, .arm, .armeb => {
+        .aarch64, .aarch64_be, .arm, .armeb, .thumb => {
             argc_argv_ptr = asm volatile (
                 \\ mov fp, #0
                 \\ mov lr, #0
diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig
index 42099c6efe..d9657d9db4 100644
--- a/lib/std/zig/system.zig
+++ b/lib/std/zig/system.zig
@@ -349,6 +349,15 @@ pub const NativeTargetInfo = struct {
                     }
                 }
             },
+            .arm, .armeb => {
+                // XXX What do we do if the target has the noarm feature?
+                //     What do we do if the user specifies +thumb_mode?
+            },
+            .thumb, .thumbeb => {
+                result.target.cpu.features.addFeature(
+                    @enumToInt(std.Target.arm.Feature.thumb_mode),
+                );
+            },
             else => {},
         }
         cross_target.updateCpuFeatures(&result.target.cpu.features);
diff --git a/test/stage1/behavior/align.zig b/test/stage1/behavior/align.zig
index 0a0cc3bcc0..38f5df0176 100644
--- a/test/stage1/behavior/align.zig
+++ b/test/stage1/behavior/align.zig
@@ -141,6 +141,7 @@ fn alignedBig() align(16) i32 {
 test "@alignCast functions" {
     // function alignment is a compile error on wasm32/wasm64
     if (builtin.arch == .wasm32 or builtin.arch == .wasm64) return error.SkipZigTest;
+    if (builtin.arch == .thumb) return error.SkipZigTest;
 
     expect(fnExpectsOnly1(simple4) == 0x19);
 }
@@ -157,6 +158,7 @@ fn simple4() align(4) i32 {
 test "generic function with align param" {
     // function alignment is a compile error on wasm32/wasm64
     if (builtin.arch == .wasm32 or builtin.arch == .wasm64) return error.SkipZigTest;
+    if (builtin.arch == .thumb) return error.SkipZigTest;
 
     expect(whyWouldYouEverDoThis(1) == 0x1);
     expect(whyWouldYouEverDoThis(4) == 0x1);
@@ -338,6 +340,7 @@ test "align(@alignOf(T)) T does not force resolution of T" {
 test "align(N) on functions" {
     // function alignment is a compile error on wasm32/wasm64
     if (builtin.arch == .wasm32 or builtin.arch == .wasm64) return error.SkipZigTest;
+    if (builtin.arch == .thumb) return error.SkipZigTest;
 
     expect((@ptrToInt(overaligned_fn) & (0x1000 - 1)) == 0);
 }
diff --git a/test/stage1/behavior/async_fn.zig b/test/stage1/behavior/async_fn.zig
index 0765eac7e8..09db0eeb29 100644
--- a/test/stage1/behavior/async_fn.zig
+++ b/test/stage1/behavior/async_fn.zig
@@ -110,6 +110,9 @@ test "calling an inferred async function" {
 }
 
 test "@frameSize" {
+    if (builtin.arch == .thumb or builtin.arch == .thumbeb)
+        return error.SkipZigTest;
+
     const S = struct {
         fn doTheTest() void {
             {
diff --git a/test/stage1/behavior/atomics.zig b/test/stage1/behavior/atomics.zig
index f9703e7308..d49ca730e6 100644
--- a/test/stage1/behavior/atomics.zig
+++ b/test/stage1/behavior/atomics.zig
@@ -149,9 +149,10 @@ fn testAtomicStore() void {
 }
 
 test "atomicrmw with floats" {
-    if (builtin.arch == .aarch64 or builtin.arch == .arm or builtin.arch == .riscv64) {
+    switch (builtin.arch) {
         // https://github.com/ziglang/zig/issues/4457
-        return error.SkipZigTest;
+        .aarch64, .arm, .thumb, .riscv64 => return error.SkipZigTest,
+        else => {},
     }
     testAtomicRmwFloat();
     comptime testAtomicRmwFloat();