From c641fb8f05cfbca7484a09496f33bc9c2d95941e Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:40:31 -0700
Subject: [PATCH 01/22] stage2: fix {add,sub,mul}_with_overflow vectorization
 in LLVM backend

---
 src/codegen/llvm.zig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 5a8516b363..3e15bd8d9a 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5604,8 +5604,9 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(extra.rhs);
 
         const lhs_ty = self.air.typeOf(extra.lhs);
+        const scalar_ty = lhs_ty.scalarType();
 
-        const intrinsic_name = if (lhs_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;
+        const intrinsic_name = if (scalar_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;
 
         const llvm_lhs_ty = try self.dg.llvmType(lhs_ty);
 

From c2cb9b7cade597bc967620174b31db3895681f96 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:41:44 -0700
Subject: [PATCH 02/22] stage2: vectorize shl_with_overflow in LLVM backend

---
 src/codegen/llvm.zig | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 3e15bd8d9a..3abdccfbe2 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5899,26 +5899,30 @@ pub const FuncGen = struct {
 
         const lhs_ty = self.air.typeOf(extra.lhs);
         const rhs_ty = self.air.typeOf(extra.rhs);
+        const lhs_scalar_ty = lhs_ty.scalarType();
+        const rhs_scalar_ty = rhs_ty.scalarType();
+
         const dest_ty = self.air.typeOfIndex(inst);
         const llvm_dest_ty = try self.dg.llvmType(dest_ty);
 
         const tg = self.dg.module.getTarget();
 
-        const casted_rhs = if (rhs_ty.bitSize(tg) < lhs_ty.bitSize(tg))
+        const casted_rhs = if (rhs_scalar_ty.bitSize(tg) < lhs_scalar_ty.bitSize(tg))
             self.builder.buildZExt(rhs, try self.dg.llvmType(lhs_ty), "")
         else
             rhs;
 
         const result = self.builder.buildShl(lhs, casted_rhs, "");
-        const reconstructed = if (lhs_ty.isSignedInt())
+        const reconstructed = if (lhs_scalar_ty.isSignedInt())
             self.builder.buildAShr(result, casted_rhs, "")
         else
             self.builder.buildLShr(result, casted_rhs, "");
 
         const overflow_bit = self.builder.buildICmp(.NE, lhs, reconstructed, "");
 
-        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, 0, "");
-        return self.builder.buildInsertValue(partial, overflow_bit, 1, "");
+        var ty_buf: Type.Payload.Pointer = undefined;
+        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, llvmFieldIndex(dest_ty, 0, tg, &ty_buf).?, "");
+        return self.builder.buildInsertValue(partial, overflow_bit, llvmFieldIndex(dest_ty, 1, tg, &ty_buf).?, "");
     }
 
     fn airAnd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {

From ca1ab38d3a037239fc1399c2f9d5b2967acb6757 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:43:47 -0700
Subject: [PATCH 03/22] stage2: add global `Type` constant for `u1`

---
 src/type.zig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/type.zig b/src/type.zig
index 2c3ce0d900..54b7d44a3d 100644
--- a/src/type.zig
+++ b/src/type.zig
@@ -5999,6 +5999,7 @@ pub const Type = extern union {
         };
     };
 
+    pub const @"u1" = initTag(.u1);
     pub const @"u8" = initTag(.u8);
     pub const @"u16" = initTag(.u16);
     pub const @"u32" = initTag(.u32);

From 6b5c87957b37df89d2ea53f1b39a7e4be0bf1326 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:45:30 -0700
Subject: [PATCH 04/22] stage2: handle vectors in `Value.intFitsInType`

---
 src/value.zig | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/value.zig b/src/value.zig
index 1e70ad0c54..2ebabe3a27 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1671,6 +1671,7 @@ pub const Value = extern union {
     }
 
     /// Asserts the value is an integer, and the destination type is ComptimeInt or Int.
+    /// Vectors are also accepted. Vector results are reduced with AND.
     pub fn intFitsInType(self: Value, ty: Type, target: Target) bool {
         switch (self.tag()) {
             .zero,
@@ -1767,6 +1768,16 @@ pub const Value = extern union {
                 else => unreachable,
             },
 
+            .aggregate => {
+                assert(ty.zigTypeTag() == .Vector);
+                for (self.castTag(.aggregate).?.data) |elem| {
+                    if (!elem.intFitsInType(ty.scalarType(), target)) {
+                        return false;
+                    }
+                }
+                return true;
+            },
+
             else => unreachable,
         }
     }

From e8117bab6f786348142a72b5279380a937c3a151 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:47:35 -0700
Subject: [PATCH 05/22] stage2: clean up creation of boolean `Value`s

---
 src/value.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/value.zig b/src/value.zig
index 2ebabe3a27..ffc15dfa19 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -2026,7 +2026,7 @@ pub const Value = extern union {
         const result_data = try allocator.alloc(Value, ty.vectorLen());
         for (result_data) |*scalar, i| {
             const res_bool = compareScalar(lhs.indexVectorlike(i), op, rhs.indexVectorlike(i), ty.scalarType(), mod);
-            scalar.* = if (res_bool) Value.@"true" else Value.@"false";
+            scalar.* = makeBool(res_bool);
         }
         return Value.Tag.aggregate.create(allocator, result_data);
     }

From 86a928ce61ae7df52d3d54fa5c653195a7a4cbef Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:50:55 -0700
Subject: [PATCH 06/22] stage2: perform comptime vectorization of
 `*_with_overflow` in `Value`

---
 src/value.zig | 103 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 98 insertions(+), 5 deletions(-)

diff --git a/src/value.zig b/src/value.zig
index ffc15dfa19..b7764327c0 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -2961,7 +2961,8 @@ pub const Value = extern union {
     }
 
     pub const OverflowArithmeticResult = struct {
-        overflowed: bool,
+        /// TODO: Rename to `overflow_bit` and make of type `u1`.
+        overflowed: Value,
         wrapped_result: Value,
     };
 
@@ -2971,6 +2972,29 @@ pub const Value = extern union {
         ty: Type,
         arena: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intAddWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intAddWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intAddWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
 
@@ -2986,7 +3010,7 @@ pub const Value = extern union {
         const overflowed = result_bigint.addWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
         const result = try fromBigInt(arena, result_bigint.toConst());
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = result,
         };
     }
@@ -3097,6 +3121,29 @@ pub const Value = extern union {
         ty: Type,
         arena: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intSubWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intSubWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intSubWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
 
@@ -3112,7 +3159,7 @@ pub const Value = extern union {
         const overflowed = result_bigint.subWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
         const wrapped_result = try fromBigInt(arena, result_bigint.toConst());
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = wrapped_result,
         };
     }
@@ -3207,6 +3254,29 @@ pub const Value = extern union {
         ty: Type,
         arena: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intMulWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intMulWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intMulWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
 
@@ -3231,7 +3301,7 @@ pub const Value = extern union {
         }
 
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = try fromBigInt(arena, result_bigint.toConst()),
         };
     }
@@ -3921,6 +3991,29 @@ pub const Value = extern union {
         ty: Type,
         allocator: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try allocator.alloc(Value, ty.vectorLen());
+            const result_data = try allocator.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try shlWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), allocator, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(allocator, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(allocator, result_data),
+            };
+        }
+        return shlWithOverflowScalar(lhs, rhs, ty, allocator, target);
+    }
+
+    pub fn shlWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        allocator: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
         var lhs_space: Value.BigIntSpace = undefined;
@@ -3941,7 +4034,7 @@ pub const Value = extern union {
             result_bigint.truncate(result_bigint.toConst(), info.signedness, info.bits);
         }
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = try fromBigInt(allocator, result_bigint.toConst()),
         };
     }

From eb06c78a8ac472b3406075dcbddf7fce63e98597 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 15:55:33 -0700
Subject: [PATCH 07/22] Sema: vectorize overflow arithmetic

---
 src/Sema.zig | 118 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 73 insertions(+), 45 deletions(-)

diff --git a/src/Sema.zig b/src/Sema.zig
index 2cc9b82410..5c37b1a0f4 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -9417,32 +9417,29 @@ fn zirOverflowArithmetic(
     const ptr = sema.resolveInst(extra.ptr);
 
     const lhs_ty = sema.typeOf(lhs);
+    const rhs_ty = sema.typeOf(rhs);
     const mod = sema.mod;
     const target = mod.getTarget();
 
     // Note, the types of lhs/rhs (also for shifting)/ptr are already correct as ensured by astgen.
+    try sema.checkVectorizableBinaryOperands(block, src, lhs_ty, rhs_ty, lhs_src, rhs_src);
     const dest_ty = lhs_ty;
-    if (dest_ty.zigTypeTag() != .Int) {
-        return sema.fail(block, src, "expected integer type, found '{}'", .{dest_ty.fmt(mod)});
+    if (dest_ty.scalarType().zigTypeTag() != .Int) {
+        return sema.fail(block, src, "expected vector of integers or integer type, found '{}'", .{dest_ty.fmt(mod)});
     }
 
     const maybe_lhs_val = try sema.resolveMaybeUndefVal(block, lhs_src, lhs);
     const maybe_rhs_val = try sema.resolveMaybeUndefVal(block, rhs_src, rhs);
 
-    const types = try sema.arena.alloc(Type, 2);
-    const values = try sema.arena.alloc(Value, 2);
-    const tuple_ty = try Type.Tag.tuple.create(sema.arena, .{
-        .types = types,
-        .values = values,
-    });
-
-    types[0] = dest_ty;
-    types[1] = Type.initTag(.u1);
-    values[0] = Value.initTag(.unreachable_value);
-    values[1] = Value.initTag(.unreachable_value);
+    const tuple_ty = try sema.overflowArithmeticTupleType(dest_ty);
+    const ov_ty = tuple_ty.tupleFields().types[1];
+    // TODO: Remove and use `ov_ty` instead.
+    //       This is a temporary type used until overflow arithmetic properly returns `u1` instead of `bool`.
+    const overflowed_ty = if (dest_ty.zigTypeTag() == .Vector) try Type.vector(sema.arena, dest_ty.vectorLen(), Type.@"bool") else Type.@"bool";
 
     const result: struct {
-        overflowed: enum { yes, no, undef },
+        /// TODO: Rename to `overflow_bit` and make of type `u1`.
+        overflowed: Air.Inst.Ref,
         wrapped: Air.Inst.Ref,
     } = result: {
         switch (zir_tag) {
@@ -9452,23 +9449,24 @@ fn zirOverflowArithmetic(
                 // Otherwise, if either of the argument is undefined, undefined is returned.
                 if (maybe_lhs_val) |lhs_val| {
                     if (!lhs_val.isUndef() and lhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = rhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                     }
                 }
                 if (maybe_rhs_val) |rhs_val| {
                     if (!rhs_val.isUndef() and rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     }
                 }
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.intAddWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9477,17 +9475,18 @@ fn zirOverflowArithmetic(
                 // Otherwise, if either result is undefined, both results are undefined.
                 if (maybe_rhs_val) |rhs_val| {
                     if (rhs_val.isUndef()) {
-                        break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                        break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                     } else if (rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     } else if (maybe_lhs_val) |lhs_val| {
                         if (lhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.intSubWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9498,9 +9497,9 @@ fn zirOverflowArithmetic(
                 if (maybe_lhs_val) |lhs_val| {
                     if (!lhs_val.isUndef()) {
                         if (lhs_val.compareWithZero(.eq)) {
-                            break :result .{ .overflowed = .no, .wrapped = lhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                         } else if (lhs_val.compare(.eq, Value.one, dest_ty, mod)) {
-                            break :result .{ .overflowed = .no, .wrapped = rhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                         }
                     }
                 }
@@ -9508,9 +9507,9 @@ fn zirOverflowArithmetic(
                 if (maybe_rhs_val) |rhs_val| {
                     if (!rhs_val.isUndef()) {
                         if (rhs_val.compareWithZero(.eq)) {
-                            break :result .{ .overflowed = .no, .wrapped = rhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                         } else if (rhs_val.compare(.eq, Value.one, dest_ty, mod)) {
-                            break :result .{ .overflowed = .no, .wrapped = lhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                         }
                     }
                 }
@@ -9518,12 +9517,13 @@ fn zirOverflowArithmetic(
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.intMulWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9533,23 +9533,24 @@ fn zirOverflowArithmetic(
                 // Oterhwise if either of the arguments is undefined, both results are undefined.
                 if (maybe_lhs_val) |lhs_val| {
                     if (!lhs_val.isUndef() and lhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     }
                 }
                 if (maybe_rhs_val) |rhs_val| {
                     if (!rhs_val.isUndef() and rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     }
                 }
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.shlWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9577,21 +9578,40 @@ fn zirOverflowArithmetic(
             } },
         });
 
-        const wrapped = try block.addStructFieldVal(tuple, 0, dest_ty);
+        const wrapped = try sema.tupleFieldValByIndex(block, src, tuple, 0, tuple_ty);
         try sema.storePtr2(block, src, ptr, ptr_src, wrapped, src, .store);
 
-        const overflow_bit = try block.addStructFieldVal(tuple, 1, Type.initTag(.u1));
-        const zero_u1 = try sema.addConstant(Type.initTag(.u1), Value.zero);
-        return try block.addBinOp(.cmp_neq, overflow_bit, zero_u1);
+        const overflow_bit = try sema.tupleFieldValByIndex(block, src, tuple, 1, tuple_ty);
+        const zero_ov_val = if (dest_ty.zigTypeTag() == .Vector) try Value.Tag.repeated.create(sema.arena, Value.zero) else Value.zero;
+        const zero_ov = try sema.addConstant(ov_ty, zero_ov_val);
+
+        const overflowed_inst = if (dest_ty.zigTypeTag() == .Vector)
+            block.addCmpVector(overflow_bit, .zero, .neq, try sema.addType(ov_ty))
+        else
+            block.addBinOp(.cmp_neq, overflow_bit, zero_ov);
+        return overflowed_inst;
     };
 
     try sema.storePtr2(block, src, ptr, ptr_src, result.wrapped, src, .store);
+    return result.overflowed;
+}
 
-    return switch (result.overflowed) {
-        .yes => Air.Inst.Ref.bool_true,
-        .no => Air.Inst.Ref.bool_false,
-        .undef => try sema.addConstUndef(Type.bool),
-    };
+fn overflowArithmeticTupleType(sema: *Sema, ty: Type) !Type {
+    const ov_ty = if (ty.zigTypeTag() == .Vector) try Type.vector(sema.arena, ty.vectorLen(), Type.@"u1") else Type.@"u1";
+
+    const types = try sema.arena.alloc(Type, 2);
+    const values = try sema.arena.alloc(Value, 2);
+    const tuple_ty = try Type.Tag.tuple.create(sema.arena, .{
+        .types = types,
+        .values = values,
+    });
+
+    types[0] = ty;
+    types[1] = ov_ty;
+    values[0] = Value.initTag(.unreachable_value);
+    values[1] = Value.initTag(.unreachable_value);
+
+    return tuple_ty;
 }
 
 fn analyzeArithmetic(
@@ -23093,6 +23113,14 @@ fn addIntUnsigned(sema: *Sema, ty: Type, int: u64) CompileError!Air.Inst.Ref {
     return sema.addConstant(ty, try Value.Tag.int_u64.create(sema.arena, int));
 }
 
+fn addBool(sema: *Sema, ty: Type, boolean: bool) CompileError!Air.Inst.Ref {
+    return switch (ty.zigTypeTag()) {
+        .Vector => sema.addConstant(ty, try Value.Tag.repeated.create(sema.arena, Value.makeBool(boolean))),
+        .Bool => sema.resolveInst(if (boolean) .bool_true else .bool_false),
+        else => unreachable,
+    };
+}
+
 fn addConstUndef(sema: *Sema, ty: Type) CompileError!Air.Inst.Ref {
     return sema.addConstant(ty, Value.undef);
 }

From c2980f332ed46e6ad7e8ac81b4dbef6d363447fb Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 16:04:17 -0700
Subject: [PATCH 08/22] Sema: implement integer overflow safety for add, sub,
 mul

---
 src/Sema.zig | 74 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 12 deletions(-)

diff --git a/src/Sema.zig b/src/Sema.zig
index 5c37b1a0f4..a37fc2ef50 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1574,6 +1574,12 @@ fn failWithErrorSetCodeMissing(
     });
 }
 
+fn failWithIntegerOverflow(sema: *Sema, block: *Block, src: LazySrcLoc, int_ty: Type, val: Value) CompileError {
+    return sema.fail(block, src, "overflow of integer type '{}' with value '{}'", .{
+        int_ty.fmt(sema.mod), val.fmtValue(Type.@"comptime_int", sema.mod),
+    });
+}
+
 /// We don't return a pointer to the new error note because the pointer
 /// becomes invalid when you add another one.
 fn errNote(
@@ -9711,10 +9717,11 @@ fn analyzeArithmetic(
                     }
                     if (maybe_rhs_val) |rhs_val| {
                         if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intAdd(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const sum = try lhs_val.intAdd(rhs_val, resolved_type, sema.arena, target);
+                            if (!sum.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, sum);
+                            }
+                            return sema.addConstant(resolved_type, sum);
                         } else {
                             return sema.addConstant(
                                 resolved_type,
@@ -9804,10 +9811,11 @@ fn analyzeArithmetic(
                     }
                     if (maybe_rhs_val) |rhs_val| {
                         if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intSub(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const diff = try lhs_val.intSub(rhs_val, resolved_type, sema.arena, target);
+                            if (!diff.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, diff);
+                            }
+                            return sema.addConstant(resolved_type, diff);
                         } else {
                             return sema.addConstant(
                                 resolved_type,
@@ -10177,10 +10185,11 @@ fn analyzeArithmetic(
                             }
                         }
                         if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intMul(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const product = try lhs_val.intMul(rhs_val, resolved_type, sema.arena, target);
+                            if (!product.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, product);
+                            }
+                            return sema.addConstant(resolved_type, product);
                         } else {
                             return sema.addConstant(
                                 resolved_type,
@@ -10468,6 +10477,45 @@ fn analyzeArithmetic(
     };
 
     try sema.requireRuntimeBlock(block, rs.src);
+    if (block.wantSafety()) {
+        if (scalar_tag == .Int) {
+            const maybe_op_ov: ?Air.Inst.Tag = switch (rs.air_tag) {
+                .add => .add_with_overflow,
+                .sub => .sub_with_overflow,
+                .mul => .mul_with_overflow,
+                else => null,
+            };
+            if (maybe_op_ov) |op_ov_tag| {
+                const op_ov_tuple_ty = try sema.overflowArithmeticTupleType(resolved_type);
+                const op_ov = try block.addInst(.{
+                    .tag = op_ov_tag,
+                    .data = .{ .ty_pl = .{
+                        .ty = try sema.addType(op_ov_tuple_ty),
+                        .payload = try sema.addExtra(Air.Bin{
+                            .lhs = casted_lhs,
+                            .rhs = casted_rhs,
+                        }),
+                    } },
+                });
+                const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
+                const any_ov_bit = if (resolved_type.zigTypeTag() == .Vector)
+                    try block.addInst(.{
+                        .tag = .reduce,
+                        .data = .{ .reduce = .{
+                            .operand = ov_bit,
+                            .operation = .Or,
+                        } },
+                    })
+                else
+                    ov_bit;
+                const zero_ov = try sema.addConstant(Type.@"u1", Value.zero);
+                const no_ov = try block.addBinOp(.cmp_eq, any_ov_bit, zero_ov);
+
+                try sema.addSafetyCheck(block, no_ov, .integer_overflow);
+                return sema.tupleFieldValByIndex(block, src, op_ov, 0, op_ov_tuple_ty);
+            }
+        }
+    }
     return block.addBinOp(rs.air_tag, casted_lhs, casted_rhs);
 }
 
@@ -16702,6 +16750,7 @@ pub const PanicId = enum {
     invalid_error_code,
     index_out_of_bounds,
     cast_truncated_data,
+    integer_overflow,
 };
 
 fn addSafetyCheck(
@@ -16825,6 +16874,7 @@ fn safetyPanic(
         .invalid_error_code => "invalid error code",
         .index_out_of_bounds => "attempt to index out of bounds",
         .cast_truncated_data => "integer cast truncated bits",
+        .integer_overflow => "integer overflow",
     };
 
     const msg_inst = msg_inst: {

From afc714d5e5ff4d10a2b5dcc8f4c2eac8245de35c Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 16:06:59 -0700
Subject: [PATCH 09/22] stage2: implement runtime safety checks for shl_exact

---
 src/Sema.zig | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/Sema.zig b/src/Sema.zig
index a37fc2ef50..471639ba96 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -8826,8 +8826,6 @@ fn zirShl(
         return sema.addConstant(lhs_ty, val);
     } else lhs_src;
 
-    // TODO: insert runtime safety check for shl_exact
-
     const new_rhs = if (air_tag == .shl_sat) rhs: {
         // Limit the RHS type for saturating shl to be an integer as small as the LHS.
         if (rhs_is_comptime_int or
@@ -8845,6 +8843,41 @@ fn zirShl(
     } else rhs;
 
     try sema.requireRuntimeBlock(block, runtime_src);
+    if (block.wantSafety()) {
+        const maybe_op_ov: ?Air.Inst.Tag = switch (air_tag) {
+            .shl_exact => .shl_with_overflow,
+            else => null,
+        };
+        if (maybe_op_ov) |op_ov_tag| {
+            const op_ov_tuple_ty = try sema.overflowArithmeticTupleType(lhs_ty);
+            const op_ov = try block.addInst(.{
+                .tag = op_ov_tag,
+                .data = .{ .ty_pl = .{
+                    .ty = try sema.addType(op_ov_tuple_ty),
+                    .payload = try sema.addExtra(Air.Bin{
+                        .lhs = lhs,
+                        .rhs = rhs,
+                    }),
+                } },
+            });
+            const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
+            const any_ov_bit = if (lhs_ty.zigTypeTag() == .Vector)
+                try block.addInst(.{
+                    .tag = .reduce,
+                    .data = .{ .reduce = .{
+                        .operand = ov_bit,
+                        .operation = .Or,
+                    } },
+                })
+            else
+                ov_bit;
+            const zero_ov = try sema.addConstant(Type.@"u1", Value.zero);
+            const no_ov = try block.addBinOp(.cmp_eq, any_ov_bit, zero_ov);
+
+            try sema.addSafetyCheck(block, no_ov, .shl_overflow);
+            return sema.tupleFieldValByIndex(block, src, op_ov, 0, op_ov_tuple_ty);
+        }
+    }
     return block.addBinOp(air_tag, lhs, new_rhs);
 }
 
@@ -16751,6 +16784,7 @@ pub const PanicId = enum {
     index_out_of_bounds,
     cast_truncated_data,
     integer_overflow,
+    shl_overflow,
 };
 
 fn addSafetyCheck(
@@ -16875,6 +16909,7 @@ fn safetyPanic(
         .index_out_of_bounds => "attempt to index out of bounds",
         .cast_truncated_data => "integer cast truncated bits",
         .integer_overflow => "integer overflow",
+        .shl_overflow => "left shift overflowed bits",
     };
 
     const msg_inst = msg_inst: {

From 21be3d9166e8ea159ef334243b945bd20ed62652 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 26 Mar 2022 16:07:52 -0700
Subject: [PATCH 10/22] stage2: add vectorized overflow arithmetic behavior
 tests

---
 test/behavior/vector.zig | 92 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
index 9847054692..e8150dde7a 100644
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@@ -903,3 +903,95 @@ test "multiplication-assignment operator with an array operand" {
     try S.doTheTest();
     comptime try S.doTheTest();
 }
+
+test "@addWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @addWithOverflow(@Vector(4, u8), @Vector(4, u8){ 250, 250, 250, 250 }, @Vector(4, u8){ 0, 5, 6, 10 }, &result);
+            var expected: @Vector(4, bool) = .{ false, false, true, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@subWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, i8) = undefined;
+            var overflow = @subWithOverflow(@Vector(4, i8), @Vector(4, i8){ -120, -120, 120, 120 }, @Vector(4, i8){ 8, 9, -7, -8 }, &result);
+            var expected: @Vector(4, bool) = .{ false, true, false, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@mulWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @mulWithOverflow(@Vector(4, u8), @Vector(4, u8){ 10, 10, 10, 10 }, @Vector(4, u8){ 25, 26, 0, 30 }, &result);
+            var expected: @Vector(4, bool) = .{ false, true, false, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@shlWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @shlWithOverflow(@Vector(4, u8), @Vector(4, u8){ 0, 1, 8, 255 }, @Vector(4, u3){ 7, 7, 7, 7 }, &result);
+            var expected: @Vector(4, bool) = .{ false, false, true, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}

From bb3532e775bb17fff1630103d4d29a650b7ad5c3 Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Mon, 28 Mar 2022 10:40:55 -0700
Subject: [PATCH 11/22] stage2: add more vector overflow tests

---
 test/behavior/vector.zig | 44 ++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
index e8150dde7a..cbd8787701 100644
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@@ -917,10 +917,30 @@ test "@addWithOverflow" {
 
     const S = struct {
         fn doTheTest() !void {
-            var result: @Vector(4, u8) = undefined;
-            var overflow = @addWithOverflow(@Vector(4, u8), @Vector(4, u8){ 250, 250, 250, 250 }, @Vector(4, u8){ 0, 5, 6, 10 }, &result);
-            var expected: @Vector(4, bool) = .{ false, false, true, true };
-            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            {
+                var result: @Vector(4, u8) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u8), @Vector(4, u8){ 250, 250, 250, 250 }, @Vector(4, u8){ 0, 5, 6, 10 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, true, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, i8) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, i8), @Vector(4, i8){ -125, -125, 125, 125 }, @Vector(4, i8){ -3, -4, 2, 3 }, &result);
+                var expected: @Vector(4, bool) = .{ false, true, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, u1) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u1), @Vector(4, u1){ 0, 0, 1, 1 }, @Vector(4, u1){ 0, 1, 0, 1 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, u0) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u0), @Vector(4, u0){ 0, 0, 0, 0 }, @Vector(4, u0){ 0, 0, 0, 0 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, false, false };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
         }
     };
     try S.doTheTest();
@@ -940,10 +960,18 @@ test "@subWithOverflow" {
 
     const S = struct {
         fn doTheTest() !void {
-            var result: @Vector(4, i8) = undefined;
-            var overflow = @subWithOverflow(@Vector(4, i8), @Vector(4, i8){ -120, -120, 120, 120 }, @Vector(4, i8){ 8, 9, -7, -8 }, &result);
-            var expected: @Vector(4, bool) = .{ false, true, false, true };
-            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            {
+                var result: @Vector(2, u8) = undefined;
+                var overflow = @subWithOverflow(@Vector(2, u8), @Vector(2, u8){ 5, 5 }, @Vector(2, u8){ 5, 6 }, &result);
+                var expected: @Vector(2, bool) = .{ false, true };
+                try expect(mem.eql(bool, &@as([2]bool, overflow), &@as([2]bool, expected)));
+            }
+            {
+                var result: @Vector(4, i8) = undefined;
+                var overflow = @subWithOverflow(@Vector(4, i8), @Vector(4, i8){ -120, -120, 120, 120 }, @Vector(4, i8){ 8, 9, -7, -8 }, &result);
+                var expected: @Vector(4, bool) = .{ false, true, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
         }
     };
     try S.doTheTest();

From a5ea22d0693cf767fa47c9947399651ed3c35aaf Mon Sep 17 00:00:00 2001
From: William Sengir <william@sengir.com>
Date: Sat, 23 Apr 2022 02:54:52 -0700
Subject: [PATCH 12/22] LLVM: correctly pad result tuple of `airOverflow`

---
 src/codegen/llvm.zig | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 3abdccfbe2..7392b2068b 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5605,14 +5605,24 @@ pub const FuncGen = struct {
 
         const lhs_ty = self.air.typeOf(extra.lhs);
         const scalar_ty = lhs_ty.scalarType();
+        const dest_ty = self.air.typeOfIndex(inst);
 
         const intrinsic_name = if (scalar_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;
 
         const llvm_lhs_ty = try self.dg.llvmType(lhs_ty);
+        const llvm_dest_ty = try self.dg.llvmType(dest_ty);
+
+        const tg = self.dg.module.getTarget();
 
         const llvm_fn = self.getIntrinsic(intrinsic_name, &.{llvm_lhs_ty});
         const result_struct = self.builder.buildCall(llvm_fn, &[_]*const llvm.Value{ lhs, rhs }, 2, .Fast, .Auto, "");
-        return result_struct;
+
+        const result = self.builder.buildExtractValue(result_struct, 0, "");
+        const overflow_bit = self.builder.buildExtractValue(result_struct, 1, "");
+
+        var ty_buf: Type.Payload.Pointer = undefined;
+        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, llvmFieldIndex(dest_ty, 0, tg, &ty_buf).?, "");
+        return self.builder.buildInsertValue(partial, overflow_bit, llvmFieldIndex(dest_ty, 1, tg, &ty_buf).?, "");
     }
 
     fn buildElementwiseCall(

From 03ed0f0d2847a99823ee4ae1b1a0554b88c6544a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 12 May 2022 16:44:44 -0700
Subject: [PATCH 13/22] C backend: implement overflow arithmetic

Most of the work here was additions to zig.h. The lowering code is
mainly responsible for calling the correct function name depending on
the operand type.

Some of the compiler-rt calls here are not implemented yet and are
non-standard symbols due to the C programming language not needing them.

After this commit, the behavior tests with -ofmt=c are passing again.
---
 lib/std/builtin.zig |   3 +-
 src/codegen/c.zig   | 200 ++++++-----
 src/link/C/zig.h    | 821 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 940 insertions(+), 84 deletions(-)

diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig
index 53786c8661..d352ac29dc 100644
--- a/lib/std/builtin.zig
+++ b/lib/std/builtin.zig
@@ -767,8 +767,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace) noreturn
 
     // Until self-hosted catches up with stage1 language features, we have a simpler
     // default panic function:
-    if ((builtin.zig_backend == .stage2_llvm and builtin.link_libc) or
-        builtin.zig_backend == .stage2_c or
+    if (builtin.zig_backend == .stage2_c or
         builtin.zig_backend == .stage2_wasm or
         builtin.zig_backend == .stage2_arm or
         builtin.zig_backend == .stage2_aarch64 or
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 44b616c493..998271cd7f 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -1766,10 +1766,10 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
 
             .mul_add => try airMulAdd(f, inst),
 
-            .add_with_overflow => try airAddWithOverflow(f, inst),
-            .sub_with_overflow => try airSubWithOverflow(f, inst),
-            .mul_with_overflow => try airMulWithOverflow(f, inst),
-            .shl_with_overflow => try airShlWithOverflow(f, inst),
+            .add_with_overflow => try airOverflow(f, inst, "addo_"),
+            .sub_with_overflow => try airOverflow(f, inst, "subo_"),
+            .mul_with_overflow => try airOverflow(f, inst, "mulo_"),
+            .shl_with_overflow => try airOverflow(f, inst, "shlo_"),
 
             .min => try airMinMax(f, inst, "<"),
             .max => try airMinMax(f, inst, ">"),
@@ -2295,7 +2295,8 @@ fn airWrapOp(
 
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
     const inst_ty = f.air.typeOfIndex(inst);
-    const int_info = inst_ty.intInfo(f.object.dg.module.getTarget());
+    const target = f.object.dg.module.getTarget();
+    const int_info = inst_ty.intInfo(target);
     const bits = int_info.bits;
 
     // if it's an unsigned int with non-arbitrary bit size then we can just add
@@ -2313,47 +2314,8 @@ fn airWrapOp(
         return f.fail("TODO: C backend: airWrapOp for large integers", .{});
     }
 
-    var min_buf: [80]u8 = undefined;
-    const min = switch (int_info.signedness) {
-        .unsigned => "0",
-        else => switch (inst_ty.tag()) {
-            .c_short => "SHRT_MIN",
-            .c_int => "INT_MIN",
-            .c_long => "LONG_MIN",
-            .c_longlong => "LLONG_MIN",
-            .isize => "INTPTR_MIN",
-            else => blk: {
-                const val = -1 * std.math.pow(i64, 2, @intCast(i64, bits - 1));
-                break :blk std.fmt.bufPrint(&min_buf, "{d}", .{val}) catch |err| switch (err) {
-                    error.NoSpaceLeft => unreachable,
-                };
-            },
-        },
-    };
-
     var max_buf: [80]u8 = undefined;
-    const max = switch (inst_ty.tag()) {
-        .c_short => "SHRT_MAX",
-        .c_ushort => "USHRT_MAX",
-        .c_int => "INT_MAX",
-        .c_uint => "UINT_MAX",
-        .c_long => "LONG_MAX",
-        .c_ulong => "ULONG_MAX",
-        .c_longlong => "LLONG_MAX",
-        .c_ulonglong => "ULLONG_MAX",
-        .isize => "INTPTR_MAX",
-        .usize => "UINTPTR_MAX",
-        else => blk: {
-            const pow_bits = switch (int_info.signedness) {
-                .signed => bits - 1,
-                .unsigned => bits,
-            };
-            const val = std.math.pow(u64, 2, pow_bits) - 1;
-            break :blk std.fmt.bufPrint(&max_buf, "{}", .{val}) catch |err| switch (err) {
-                error.NoSpaceLeft => unreachable,
-            };
-        },
-    };
+    const max = intMax(inst_ty, target, &max_buf);
 
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
@@ -2369,10 +2331,7 @@ fn airWrapOp(
         .c_long => try w.writeAll("long"),
         .c_longlong => try w.writeAll("longlong"),
         else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
             for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                 if (bits <= nbits) {
                     try w.print("{c}{d}", .{ prefix_byte, nbits });
@@ -2390,6 +2349,9 @@ fn airWrapOp(
     try f.writeCValue(w, rhs);
 
     if (int_info.signedness == .signed) {
+        var min_buf: [80]u8 = undefined;
+        const min = intMin(inst_ty, target, &min_buf);
+
         try w.print(", {s}", .{min});
     }
 
@@ -2475,10 +2437,7 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
         .c_long => try w.writeAll("long"),
         .c_longlong => try w.writeAll("longlong"),
         else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
             for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                 if (bits <= nbits) {
                     try w.print("{c}{d}", .{ prefix_byte, nbits });
@@ -2505,28 +2464,63 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
     return ret;
 }
 
-fn airAddWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO add with overflow", .{});
-}
+fn airOverflow(f: *Function, inst: Air.Inst.Index, op_abbrev: [*:0]const u8) !CValue {
+    if (f.liveness.isUnused(inst))
+        return CValue.none;
 
-fn airSubWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO sub with overflow", .{});
-}
+    const ty_pl = f.air.instructions.items(.data)[inst].ty_pl;
+    const bin_op = f.air.extraData(Air.Bin, ty_pl.payload).data;
 
-fn airMulWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO mul with overflow", .{});
-}
+    const lhs = try f.resolveInst(bin_op.lhs);
+    const rhs = try f.resolveInst(bin_op.rhs);
 
-fn airShlWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO shl with overflow", .{});
+    const inst_ty = f.air.typeOfIndex(inst);
+    const scalar_ty = f.air.typeOf(bin_op.lhs).scalarType();
+    const target = f.object.dg.module.getTarget();
+    const int_info = scalar_ty.intInfo(target);
+    const w = f.object.writer();
+    const c_bits = toCIntBits(int_info.bits) orelse
+        return f.fail("TODO: C backend: implement integer arithmetic larger than 128 bits", .{});
+
+    var max_buf: [80]u8 = undefined;
+    const max = intMax(scalar_ty, target, &max_buf);
+
+    const ret = try f.allocLocal(inst_ty, .Mut);
+    try w.writeAll(";");
+    try f.object.indent_writer.insertNewline();
+    try f.writeCValue(w, ret);
+
+    switch (int_info.signedness) {
+        .unsigned => {
+            try w.print(".field_1 = zig_{s}u{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}", .{max});
+        },
+        .signed => {
+            var min_buf: [80]u8 = undefined;
+            const min = intMin(scalar_ty, target, &min_buf);
+
+            try w.print(".field_1 = zig_{s}i{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}, {s}", .{ min, max });
+        },
+    }
+
+    try w.writeAll(");");
+    try f.object.indent_writer.insertNewline();
+    return ret;
 }
 
 fn airNot(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3571,11 +3565,7 @@ fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8) !C
         return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
 
     try writer.print(" = zig_{s}_", .{fn_name});
-    const prefix_byte: u8 = switch (int_info.signedness) {
-        .signed => 'i',
-        .unsigned => 'u',
-    };
-    try writer.print("{c}{d}(", .{ prefix_byte, c_bits });
+    try writer.print("{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
     try f.writeCValue(writer, try f.resolveInst(operand));
     try writer.print(", {d});\n", .{int_info.bits});
     return local;
@@ -3596,11 +3586,7 @@ fn airBinOpBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u
         const int_info = lhs_ty.intInfo(target);
         const c_bits = toCIntBits(int_info.bits) orelse
             return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
-        const prefix_byte: u8 = switch (int_info.signedness) {
-            .signed => 'i',
-            .unsigned => 'u',
-        };
-        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, prefix_byte, c_bits });
+        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, signAbbrev(int_info.signedness), c_bits });
     } else if (lhs_ty.isRuntimeFloat()) {
         const c_bits = lhs_ty.floatBits(target);
         try writer.print(" = zig_{s}_f{d}", .{ fn_name, c_bits });
@@ -4085,3 +4071,53 @@ fn toCIntBits(zig_bits: u32) ?u32 {
     }
     return null;
 }
+
+fn signAbbrev(signedness: std.builtin.Signedness) u8 {
+    return switch (signedness) {
+        .signed => 'i',
+        .unsigned => 'u',
+    };
+}
+
+fn intMax(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MAX",
+        .c_ushort => return "USHRT_MAX",
+        .c_int => return "INT_MAX",
+        .c_uint => return "UINT_MAX",
+        .c_long => return "LONG_MAX",
+        .c_ulong => return "ULONG_MAX",
+        .c_longlong => return "LLONG_MAX",
+        .c_ulonglong => return "ULLONG_MAX",
+        else => {
+            const int_info = ty.intInfo(target);
+            const rhs = @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed));
+            const val = (@as(u128, 1) << rhs) - 1;
+            // TODO make this integer literal have a suffix if necessary (such as "ull")
+            return std.fmt.bufPrint(buf, "{}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
+
+fn intMin(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MIN",
+        .c_int => return "INT_MIN",
+        .c_long => return "LONG_MIN",
+        .c_longlong => return "LLONG_MIN",
+        else => {
+            const int_info = ty.intInfo(target);
+            assert(int_info.signedness == .signed);
+            const val = v: {
+                if (int_info.bits == 0) break :v 0;
+                const rhs = @intCast(u7, (int_info.bits - 1));
+                break :v -(@as(i128, 1) << rhs);
+            };
+            return std.fmt.bufPrint(buf, "{d}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
diff --git a/src/link/C/zig.h b/src/link/C/zig.h
index 85c7856d2b..6bafee987b 100644
--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@@ -165,8 +165,24 @@
 
 #define int128_t __int128
 #define uint128_t unsigned __int128
+#define UINT128_MAX ((uint128_t)(0xffffffffffffffffull) | 0xffffffffffffffffull)
 ZIG_EXTERN_C void *memcpy (void *ZIG_RESTRICT, const void *ZIG_RESTRICT, size_t);
 ZIG_EXTERN_C void *memset (void *, int, size_t);
+ZIG_EXTERN_C int64_t    __addodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __addoti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __uaddodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __uaddoti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int32_t    __subosi4(int32_t   lhs, int32_t   rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __subodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __suboti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint32_t  __usubosi4(uint32_t  lhs, uint32_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __usubodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __usuboti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __mulodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __muloti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __umulodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __umuloti4(uint128_t lhs, uint128_t rhs, int *overflow);
+
 
 static inline uint8_t zig_addw_u8(uint8_t lhs, uint8_t rhs, uint8_t max) {
     uint8_t thresh = max - rhs;
@@ -396,6 +412,811 @@ static inline long long zig_subw_longlong(long long lhs, long long rhs, long lon
     return (long long)(((unsigned long long)lhs) - ((unsigned long long)rhs));
 }
 
+static inline bool zig_addo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs + (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs + (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs + (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __addodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __addoti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs + (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_addo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs + (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_addo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs + (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_addo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_uadd_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_uaddl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_uaddll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __uaddodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_addo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    bool overflow;
+    *res = __uaddoti4(lhs, rhs, &overflow);
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs - (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs - (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs - (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __subodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __suboti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint16_t zig_subo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint32_t zig_subo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+    if (max == UINT32_MAX) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, res);
+#endif
+        int int_overflow;
+        *res = __usubosi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint64_t zig_subo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    if (max == UINT64_MAX) {
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, res);
+#else
+        int int_overflow;
+        *res = __usubodi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+#endif
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint128_t zig_subo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    if (max == UINT128_MAX) {
+        int int_overflow;
+        *res = __usuboti4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline bool zig_mulo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs * (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs * (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs * (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __mulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __muloti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs * (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_mulo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs * (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_mulo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs * (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_mulo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_umul_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_umull_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_umulll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __umulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_mulo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __umuloti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_shlo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+    int16_t big_result = (int16_t)lhs << (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_shlo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+    int32_t big_result = (int32_t)lhs << (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_shlo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+    int64_t big_result = (int64_t)lhs << (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_shlo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    int overflow;
+    *res = __shlodi4(lhs, rhs, &overflow);
+    if (overflow == 0) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_shlo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    int overflow;
+    *res = __shloti4(lhs, rhs, &overflow);
+    if (overflow == 0) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_shlo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+    uint16_t big_result = (uint16_t)lhs << (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_shlo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+    uint32_t big_result = (uint32_t)lhs << (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_shlo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+    uint64_t big_result = (uint64_t)lhs << (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_shlo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    int overflow;
+    *res = __ushlodi4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
+static inline uint128_t zig_shlo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __ushloti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
 static inline float zig_bitcast_f32_u32(uint32_t arg) {
     float dest;
     memcpy(&dest, &arg, sizeof dest);

From 316bf4fce5e069fa3ec1c2d9688e5b332eb4111c Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 12 May 2022 19:34:09 -0700
Subject: [PATCH 14/22] disable 5 failing stage2_wasm tests

---
 test/behavior/union.zig | 2 ++
 test/behavior/while.zig | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/test/behavior/union.zig b/test/behavior/union.zig
index 8315ea8a22..a8e280b258 100644
--- a/test/behavior/union.zig
+++ b/test/behavior/union.zig
@@ -212,6 +212,7 @@ test "union with specified enum tag" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     try doTest();
     comptime try doTest();
@@ -221,6 +222,7 @@ test "packed union generates correctly aligned type" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage1) return error.SkipZigTest;
 
     const U = packed union {
diff --git a/test/behavior/while.zig b/test/behavior/while.zig
index 71f1d253e9..d447b876b7 100644
--- a/test/behavior/while.zig
+++ b/test/behavior/while.zig
@@ -146,6 +146,7 @@ test "while with optional as condition" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     numbers_left = 10;
     var sum: i32 = 0;
@@ -159,6 +160,7 @@ test "while with optional as condition with else" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     numbers_left = 10;
     var sum: i32 = 0;
@@ -177,6 +179,7 @@ test "while with error union condition" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     numbers_left = 10;
     var sum: i32 = 0;

From b94d165b69f2743d779a04b1719382207e341596 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 13 May 2022 09:04:07 +0200
Subject: [PATCH 15/22] x64: fix capacity prealloc limit in lowerToMrEnc helper

---
 src/arch/x86_64/Emit.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 518635b806..57100abc0f 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -1896,7 +1896,7 @@ fn lowerToMrEnc(
     const opc = getOpCode(tag, .mr, reg.size() == 8 or reg_or_mem.size() == 8).?;
     switch (reg_or_mem) {
         .register => |dst_reg| {
-            const encoder = try Encoder.init(code, 3);
+            const encoder = try Encoder.init(code, 4);
             if (dst_reg.size() == 16) {
                 encoder.prefix16BitMode();
             }

From 0a2d3d41556a3bbe836dafa5321439fa6da9b464 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Fri, 13 May 2022 19:51:14 +0200
Subject: [PATCH 16/22] wasm: Improve overflow add/sub for ints <= 64bits

The implementation for add_with_overflow and sub_with_overflow is now a lot
more robust and takes account for signed integers and arbitrary integer bitsizes.
The final output is equal to that of the LLVM backend.
---
 src/arch/wasm/CodeGen.zig | 79 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 3 deletions(-)

diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 947174aaed..4bebf05e70 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1450,8 +1450,8 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .min => self.airMaxMin(inst, .min),
         .mul_add => self.airMulAdd(inst),
 
-        .add_with_overflow => self.airBinOpOverflow(inst, .add),
-        .sub_with_overflow => self.airBinOpOverflow(inst, .sub),
+        .add_with_overflow => self.airAddSubWithOverflow(inst, .add),
+        .sub_with_overflow => self.airAddSubWithOverflow(inst, .sub),
         .shl_with_overflow => self.airBinOpOverflow(inst, .shl),
         .mul_with_overflow => self.airMulWithOverflow(inst),
 
@@ -3988,7 +3988,7 @@ fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue
         const cmp_res = try self.cmp(rhs, diff, lhs_ty, .gt);
         try self.emitWValue(cmp_res);
         try self.addLabel(.local_set, overflow_bit.local);
-    } else if (int_info.signedness == .unsigned and op == .sub) {
+    } else if (op == .sub) {
         const cmp_res = try self.cmp(lhs, rhs, lhs_ty, .lt);
         try self.emitWValue(cmp_res);
         try self.addLabel(.local_set, overflow_bit.local);
@@ -4050,6 +4050,79 @@ fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue
     return result_ptr;
 }
 
+fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
+    assert(op == .add or op == .sub);
+    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
+    const lhs_op = try self.resolveInst(extra.lhs);
+    const rhs_op = try self.resolveInst(extra.rhs);
+    const lhs_ty = self.air.typeOf(extra.lhs);
+
+    if (lhs_ty.zigTypeTag() == .Vector) {
+        return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    }
+
+    const int_info = lhs_ty.intInfo(self.target);
+    const is_signed = int_info.signedness == .signed;
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return self.fail("TODO: Implement sub_with_overflow for integer bitsize: {d}", .{int_info.bits});
+    };
+
+    if (wasm_bits == 128) {
+        return self.fail("TODO: Implement sub_with_overflow for 128 bit integers", .{});
+    }
+
+    const zero = switch (wasm_bits) {
+        32 => WValue{ .imm32 = 0 },
+        64 => WValue{ .imm64 = 0 },
+        else => unreachable,
+    };
+    const shift_amt = wasm_bits - int_info.bits;
+    const shift_val = switch (wasm_bits) {
+        32 => WValue{ .imm32 = shift_amt },
+        64 => WValue{ .imm64 = shift_amt },
+        else => unreachable,
+    };
+
+    // for signed integers, we first apply signed shifts by the difference in bits
+    // to get the signed value, as we store it internally as 2's complement.
+    const lhs = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shl = try self.binOp(lhs_op, shift_val, lhs_ty, .shl);
+        break :blk try self.binOp(shl, shift_val, lhs_ty, .shr);
+    } else lhs_op;
+    const rhs = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shl = try self.binOp(rhs_op, shift_val, lhs_ty, .shl);
+        break :blk try self.binOp(shl, shift_val, lhs_ty, .shr);
+    } else rhs_op;
+
+    const bin_op = try self.binOp(lhs, rhs, lhs_ty, op);
+    const result = if (wasm_bits != int_info.bits) blk: {
+        break :blk try self.wrapOperand(bin_op, lhs_ty);
+    } else bin_op;
+
+    const cmp_op: std.math.CompareOperator = if (op == .sub) .gt else .lt;
+    const overflow_bit: WValue = if (is_signed) blk: {
+        if (wasm_bits == int_info.bits) {
+            const cmp_zero = try self.cmp(rhs, zero, lhs_ty, cmp_op);
+            const lt = try self.cmp(bin_op, lhs, lhs_ty, .lt);
+            break :blk try self.binOp(cmp_zero, lt, Type.u32, .xor); // result of cmp_zero and lt is always 32bit
+        }
+        const shl = try self.binOp(bin_op, shift_val, lhs_ty, .shl);
+        const shr = try self.binOp(shl, shift_val, lhs_ty, .shr);
+        break :blk try self.cmp(shr, bin_op, lhs_ty, .neq);
+    } else if (wasm_bits == int_info.bits)
+        try self.cmp(bin_op, lhs, lhs_ty, cmp_op)
+    else
+        try self.cmp(bin_op, result, lhs_ty, .neq);
+
+    const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
+    try self.store(result_ptr, result, lhs_ty, 0);
+    const offset = @intCast(u32, lhs_ty.abiSize(self.target));
+    try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);
+
+    return result_ptr;
+}
+
 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;

From 160aa4c11dcb0413796d08fd623ce7bbeabaf04b Mon Sep 17 00:00:00 2001
From: Luuk de Gram <luuk@degram.dev>
Date: Fri, 13 May 2022 21:25:23 +0200
Subject: [PATCH 17/22] wasm: Improve shl_with_overflow

This re-implements the shl_with_overflow operation from scratch,
making it a lot more robust and outputs the equal code to the LLVM backend.
---
 src/arch/wasm/CodeGen.zig | 164 ++++++++++++--------------------------
 test/behavior/union.zig   |   2 -
 test/behavior/while.zig   |   3 -
 3 files changed, 49 insertions(+), 120 deletions(-)

diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 4bebf05e70..8e84b7d1fe 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1452,7 +1452,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
 
         .add_with_overflow => self.airAddSubWithOverflow(inst, .add),
         .sub_with_overflow => self.airAddSubWithOverflow(inst, .sub),
-        .shl_with_overflow => self.airBinOpOverflow(inst, .shl),
+        .shl_with_overflow => self.airShlWithOverflow(inst),
         .mul_with_overflow => self.airMulWithOverflow(inst),
 
         .clz => self.airClz(inst),
@@ -3941,115 +3941,6 @@ fn airPtrSliceFieldPtr(self: *Self, inst: Air.Inst.Index, offset: u32) InnerErro
     return self.buildPointerOffset(slice_ptr, offset, .new);
 }
 
-fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
-    if (self.liveness.isUnused(inst)) return WValue{ .none = {} };
-
-    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
-    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
-    const lhs = try self.resolveInst(extra.lhs);
-    const rhs = try self.resolveInst(extra.rhs);
-    const lhs_ty = self.air.typeOf(extra.lhs);
-
-    if (lhs_ty.zigTypeTag() == .Vector) {
-        return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
-    }
-
-    // We store the bit if it's overflowed or not in this. As it's zero-initialized
-    // we only need to update it if an overflow (or underflow) occured.
-    const overflow_bit = try self.allocLocal(Type.initTag(.u1));
-    const int_info = lhs_ty.intInfo(self.target);
-    const wasm_bits = toWasmBits(int_info.bits) orelse {
-        return self.fail("TODO: Implement overflow arithmetic for integer bitsize: {d}", .{int_info.bits});
-    };
-
-    const zero = switch (wasm_bits) {
-        32 => WValue{ .imm32 = 0 },
-        64 => WValue{ .imm64 = 0 },
-        else => unreachable,
-    };
-    const int_max = (@as(u65, 1) << @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed))) - 1;
-    const int_max_wvalue = switch (wasm_bits) {
-        32 => WValue{ .imm32 = @intCast(u32, int_max) },
-        64 => WValue{ .imm64 = @intCast(u64, int_max) },
-        else => unreachable,
-    };
-    const int_min = if (int_info.signedness == .unsigned)
-        @as(i64, 0)
-    else
-        -@as(i64, 1) << @intCast(u6, int_info.bits - 1);
-    const int_min_wvalue = switch (wasm_bits) {
-        32 => WValue{ .imm32 = @bitCast(u32, @intCast(i32, int_min)) },
-        64 => WValue{ .imm64 = @bitCast(u64, int_min) },
-        else => unreachable,
-    };
-
-    if (int_info.signedness == .unsigned and op == .add) {
-        const diff = try self.binOp(int_max_wvalue, lhs, lhs_ty, .sub);
-        const cmp_res = try self.cmp(rhs, diff, lhs_ty, .gt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-    } else if (op == .sub) {
-        const cmp_res = try self.cmp(lhs, rhs, lhs_ty, .lt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-    } else if (int_info.signedness == .signed and op != .shl) {
-        // for overflow, we first check if lhs is > 0 (or lhs < 0 in case of subtraction). If not, we will not overflow.
-        // We first create an outer block, where we handle overflow.
-        // Then we create an inner block, where underflow is handled.
-        try self.startBlock(.block, wasm.block_empty);
-        try self.startBlock(.block, wasm.block_empty);
-        {
-            try self.emitWValue(lhs);
-            const cmp_result = try self.cmp(lhs, zero, lhs_ty, .lt);
-            try self.emitWValue(cmp_result);
-        }
-        try self.addLabel(.br_if, 0); // break to outer block, and handle underflow
-
-        // handle overflow
-        {
-            const diff = try self.binOp(int_max_wvalue, lhs, lhs_ty, .sub);
-            const cmp_res = try self.cmp(rhs, diff, lhs_ty, if (op == .add) .gt else .lt);
-            try self.emitWValue(cmp_res);
-            try self.addLabel(.local_set, overflow_bit.local);
-        }
-        try self.addLabel(.br, 1); // break from blocks, and continue regular flow.
-        try self.endBlock();
-
-        // handle underflow
-        {
-            const diff = try self.binOp(int_min_wvalue, lhs, lhs_ty, .sub);
-            const cmp_res = try self.cmp(rhs, diff, lhs_ty, if (op == .add) .lt else .gt);
-            try self.emitWValue(cmp_res);
-            try self.addLabel(.local_set, overflow_bit.local);
-        }
-        try self.endBlock();
-    }
-
-    const bin_op = if (op == .shl) blk: {
-        const tmp_val = try self.binOp(lhs, rhs, lhs_ty, op);
-        const cmp_res = try self.cmp(tmp_val, int_max_wvalue, lhs_ty, .gt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-
-        try self.emitWValue(tmp_val);
-        try self.emitWValue(int_max_wvalue);
-        switch (wasm_bits) {
-            32 => try self.addTag(.i32_and),
-            64 => try self.addTag(.i64_and),
-            else => unreachable,
-        }
-        try self.addLabel(.local_set, tmp_val.local);
-        break :blk tmp_val;
-    } else try self.wrapBinOp(lhs, rhs, lhs_ty, op);
-
-    const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
-    try self.store(result_ptr, bin_op, lhs_ty, 0);
-    const offset = @intCast(u32, lhs_ty.abiSize(self.target));
-    try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);
-
-    return result_ptr;
-}
-
 fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
     assert(op == .add or op == .sub);
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
@@ -4065,13 +3956,9 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!W
     const int_info = lhs_ty.intInfo(self.target);
     const is_signed = int_info.signedness == .signed;
     const wasm_bits = toWasmBits(int_info.bits) orelse {
-        return self.fail("TODO: Implement sub_with_overflow for integer bitsize: {d}", .{int_info.bits});
+        return self.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits});
     };
 
-    if (wasm_bits == 128) {
-        return self.fail("TODO: Implement sub_with_overflow for 128 bit integers", .{});
-    }
-
     const zero = switch (wasm_bits) {
         32 => WValue{ .imm32 = 0 },
         64 => WValue{ .imm64 = 0 },
@@ -4123,6 +4010,53 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!W
     return result_ptr;
 }
 
+fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
+    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
+    const lhs = try self.resolveInst(extra.lhs);
+    const rhs = try self.resolveInst(extra.rhs);
+    const lhs_ty = self.air.typeOf(extra.lhs);
+
+    if (lhs_ty.zigTypeTag() == .Vector) {
+        return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    }
+
+    const int_info = lhs_ty.intInfo(self.target);
+    const is_signed = int_info.signedness == .signed;
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return self.fail("TODO: Implement shl_with_overflow for integer bitsize: {d}", .{int_info.bits});
+    };
+
+    const shl = try self.binOp(lhs, rhs, lhs_ty, .shl);
+    const result = if (wasm_bits != int_info.bits) blk: {
+        break :blk try self.wrapOperand(shl, lhs_ty);
+    } else shl;
+
+    const overflow_bit = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shift_amt = wasm_bits - int_info.bits;
+        const shift_val = switch (wasm_bits) {
+            32 => WValue{ .imm32 = shift_amt },
+            64 => WValue{ .imm64 = shift_amt },
+            else => unreachable,
+        };
+
+        const secondary_shl = try self.binOp(shl, shift_val, lhs_ty, .shl);
+        const initial_shr = try self.binOp(secondary_shl, shift_val, lhs_ty, .shr);
+        const shr = try self.wrapBinOp(initial_shr, rhs, lhs_ty, .shr);
+        break :blk try self.cmp(lhs, shr, lhs_ty, .neq);
+    } else blk: {
+        const shr = try self.binOp(result, rhs, lhs_ty, .shr);
+        break :blk try self.cmp(lhs, shr, lhs_ty, .neq);
+    };
+
+    const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
+    try self.store(result_ptr, result, lhs_ty, 0);
+    const offset = @intCast(u32, lhs_ty.abiSize(self.target));
+    try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);
+
+    return result_ptr;
+}
+
 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
diff --git a/test/behavior/union.zig b/test/behavior/union.zig
index a8e280b258..8315ea8a22 100644
--- a/test/behavior/union.zig
+++ b/test/behavior/union.zig
@@ -212,7 +212,6 @@ test "union with specified enum tag" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     try doTest();
     comptime try doTest();
@@ -222,7 +221,6 @@ test "packed union generates correctly aligned type" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage1) return error.SkipZigTest;
 
     const U = packed union {
diff --git a/test/behavior/while.zig b/test/behavior/while.zig
index d447b876b7..71f1d253e9 100644
--- a/test/behavior/while.zig
+++ b/test/behavior/while.zig
@@ -146,7 +146,6 @@ test "while with optional as condition" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     numbers_left = 10;
     var sum: i32 = 0;
@@ -160,7 +159,6 @@ test "while with optional as condition with else" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     numbers_left = 10;
     var sum: i32 = 0;
@@ -179,7 +177,6 @@ test "while with error union condition" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
 
     numbers_left = 10;
     var sum: i32 = 0;

From a84be7e988c91606bc42e1e1c8a34bbdcdb8a3f1 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 13 May 2022 21:06:53 -0700
Subject: [PATCH 18/22] zig.h: improve overflow shl

 * zig_addo_u128: fix type-o
 * redo the shift-left overflow inline functions. no need to depend on
   compiler-rt.
---
 src/link/C/zig.h | 198 +++++++++++++++++------------------------------
 1 file changed, 73 insertions(+), 125 deletions(-)

diff --git a/src/link/C/zig.h b/src/link/C/zig.h
index 6bafee987b..e3a0447c05 100644
--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@@ -633,13 +633,13 @@ static inline uint64_t zig_addo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, u
 }
 
 static inline uint128_t zig_addo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
-    bool overflow;
+    int overflow;
     *res = __uaddoti4(lhs, rhs, &overflow);
-    if (*res > max && !overflow) {
+    if (*res > max && overflow == 0) {
         *res -= max - 1;
         return true;
     }
-    return overflow;
+    return overflow != 0;
 }
 
 static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
@@ -1095,128 +1095,6 @@ static inline uint128_t zig_mulo_u128(uint128_t lhs, uint128_t rhs, uint128_t *r
     return overflow != 0;
 }
 
-static inline bool zig_shlo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
-    int16_t big_result = (int16_t)lhs << (int16_t)rhs;
-    if (big_result > max) {
-        *res = big_result - ((int16_t)max - (int16_t)min);
-        return true;
-    }
-    if (big_result < min) {
-        *res = big_result + ((int16_t)max - (int16_t)min);
-        return true;
-    }
-    *res = big_result;
-    return false;
-}
-
-static inline bool zig_shlo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
-    int32_t big_result = (int32_t)lhs << (int32_t)rhs;
-    if (big_result > max) {
-        *res = big_result - ((int32_t)max - (int32_t)min);
-        return true;
-    }
-    if (big_result < min) {
-        *res = big_result + ((int32_t)max - (int32_t)min);
-        return true;
-    }
-    *res = big_result;
-    return false;
-}
-
-static inline bool zig_shlo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
-    int64_t big_result = (int64_t)lhs << (int64_t)rhs;
-    if (big_result > max) {
-        *res = big_result - ((int64_t)max - (int64_t)min);
-        return true;
-    }
-    if (big_result < min) {
-        *res = big_result + ((int64_t)max - (int64_t)min);
-        return true;
-    }
-    *res = big_result;
-    return false;
-}
-
-static inline bool zig_shlo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
-    int overflow;
-    *res = __shlodi4(lhs, rhs, &overflow);
-    if (overflow == 0) {
-        if (*res > max) {
-            // TODO adjust the result to be the truncated bits
-            return true;
-        } else if (*res < min) {
-            // TODO adjust the result to be the truncated bits
-            return true;
-        }
-    }
-    return overflow != 0;
-}
-
-static inline bool zig_shlo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
-    int overflow;
-    *res = __shloti4(lhs, rhs, &overflow);
-    if (overflow == 0) {
-        if (*res > max) {
-            // TODO adjust the result to be the truncated bits
-            return true;
-        } else if (*res < min) {
-            // TODO adjust the result to be the truncated bits
-            return true;
-        }
-    }
-    return overflow != 0;
-}
-
-static inline bool zig_shlo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
-    uint16_t big_result = (uint16_t)lhs << (uint16_t)rhs;
-    if (big_result > max) {
-        *res = big_result - max - 1;
-        return true;
-    }
-    *res = big_result;
-    return false;
-}
-
-static inline uint16_t zig_shlo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
-    uint32_t big_result = (uint32_t)lhs << (uint32_t)rhs;
-    if (big_result > max) {
-        *res = big_result - max - 1;
-        return true;
-    }
-    *res = big_result;
-    return false;
-}
-
-static inline uint32_t zig_shlo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
-    uint64_t big_result = (uint64_t)lhs << (uint64_t)rhs;
-    if (big_result > max) {
-        *res = big_result - max - 1;
-        return true;
-    }
-    *res = big_result;
-    return false;
-}
-
-static inline uint64_t zig_shlo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
-    int overflow;
-    *res = __ushlodi4(lhs, rhs, &overflow);
-    if (*res > max && overflow == 0) {
-        *res -= max - 1;
-        return true;
-    }
-    return overflow != 0;
-}
-
-static inline uint128_t zig_shlo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
-    int overflow;
-    *res = __ushloti4(lhs, rhs, &overflow);
-    if (*res > max && overflow == 0) {
-        *res -= max - 1;
-        return true;
-    }
-    return overflow != 0;
-}
-
 static inline float zig_bitcast_f32_u32(uint32_t arg) {
     float dest;
     memcpy(&dest, &arg, sizeof dest);
@@ -1429,6 +1307,76 @@ static inline int zig_popcount_u128(uint128_t value, uint8_t zig_type_bit_width)
 
 #define zig_popcount_i128 zig_popcount_u128
 
+static inline bool zig_shlo_i8(int8_t lhs, int8_t rhs, int8_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i8(lhs, bits) >= rhs) return false;
+    *res &= UINT8_MAX >> (8 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i16(int16_t lhs, int16_t rhs, int16_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i16(lhs, bits) >= rhs) return false;
+    *res &= UINT16_MAX >> (16 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i32(int32_t lhs, int32_t rhs, int32_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i32(lhs, bits) >= rhs) return false;
+    *res &= UINT32_MAX >> (32 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i64(int64_t lhs, int64_t rhs, int64_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i64(lhs, bits) >= rhs) return false;
+    *res &= UINT64_MAX >> (64 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i128(int128_t lhs, int128_t rhs, int128_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i128(lhs, bits) >= rhs) return false;
+    *res &= UINT128_MAX >> (128 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u8(lhs, bits) >= rhs) return false;
+    *res &= UINT8_MAX >> (8 - bits);
+    return true;
+}
+
+static inline uint16_t zig_shlo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u16(lhs, bits) >= rhs) return false;
+    *res &= UINT16_MAX >> (16 - bits);
+    return true;
+}
+
+static inline uint32_t zig_shlo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u32(lhs, bits) >= rhs) return false;
+    *res &= UINT32_MAX >> (32 - bits);
+    return true;
+}
+
+static inline uint64_t zig_shlo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u64(lhs, bits) >= rhs) return false;
+    *res &= UINT64_MAX >> (64 - bits);
+    return true;
+}
+
+static inline uint128_t zig_shlo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u128(lhs, bits) >= rhs) return false;
+    *res &= UINT128_MAX >> (128 - bits);
+    return true;
+}
+
 #define zig_sign_extend(T) \
     static inline T zig_sign_extend_##T(T value, uint8_t zig_type_bit_width) { \
         const T m = (T)1 << (T)(zig_type_bit_width - 1); \

From 852c82084163eec9911384b325dbd5713ee4df90 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 14 May 2022 21:24:48 +0200
Subject: [PATCH 19/22] aarch64: sub_with_overflow should always track V flag

---
 src/arch/aarch64/CodeGen.zig |  4 ++
 test/behavior/math.zig       | 85 +++++++++++++++++++++++++-----------
 2 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index e43cbca1c7..10730c446f 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -1901,6 +1901,10 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
                             }
                         };
 
+                        if (tag == .sub_with_overflow) {
+                            break :result MCValue{ .register_v_flag = dest.register };
+                        }
+
                         switch (int_info.signedness) {
                             .unsigned => break :result MCValue{ .register_c_flag = dest.register },
                             .signed => break :result MCValue{ .register_v_flag = dest.register },
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
index 011c714935..2f8cf06ee7 100644
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@@ -621,24 +621,41 @@ test "128-bit multiplication" {
 test "@addWithOverflow" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    var result: u8 = undefined;
-    try expect(@addWithOverflow(u8, 250, 100, &result));
-    try expect(result == 94);
-    try expect(!@addWithOverflow(u8, 100, 150, &result));
-    try expect(result == 250);
+    {
+        var result: u8 = undefined;
+        try expect(@addWithOverflow(u8, 250, 100, &result));
+        try expect(result == 94);
+        try expect(!@addWithOverflow(u8, 100, 150, &result));
+        try expect(result == 250);
 
-    var a: u8 = 200;
-    var b: u8 = 99;
-    try expect(@addWithOverflow(u8, a, b, &result));
-    try expect(result == 43);
-    b = 55;
-    try expect(!@addWithOverflow(u8, a, b, &result));
-    try expect(result == 255);
+        var a: u8 = 200;
+        var b: u8 = 99;
+        try expect(@addWithOverflow(u8, a, b, &result));
+        try expect(result == 43);
+        b = 55;
+        try expect(!@addWithOverflow(u8, a, b, &result));
+        try expect(result == 255);
+    }
+
+    {
+        var a: usize = 6;
+        var b: usize = 6;
+        var res: usize = undefined;
+        try expect(!@addWithOverflow(usize, a, b, &res));
+        try expect(res == 12);
+    }
+
+    {
+        var a: isize = -6;
+        var b: isize = -6;
+        var res: isize = undefined;
+        try expect(!@addWithOverflow(isize, a, b, &res));
+        try expect(res == -12);
+    }
 }
 
 test "small int addition" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
 
     var x: u2 = 0;
@@ -886,19 +903,37 @@ test "@mulWithOverflow bitsize > 32" {
 test "@subWithOverflow" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    var result: u8 = undefined;
-    try expect(@subWithOverflow(u8, 1, 2, &result));
-    try expect(result == 255);
-    try expect(!@subWithOverflow(u8, 1, 1, &result));
-    try expect(result == 0);
+    {
+        var result: u8 = undefined;
+        try expect(@subWithOverflow(u8, 1, 2, &result));
+        try expect(result == 255);
+        try expect(!@subWithOverflow(u8, 1, 1, &result));
+        try expect(result == 0);
 
-    var a: u8 = 1;
-    var b: u8 = 2;
-    try expect(@subWithOverflow(u8, a, b, &result));
-    try expect(result == 255);
-    b = 1;
-    try expect(!@subWithOverflow(u8, a, b, &result));
-    try expect(result == 0);
+        var a: u8 = 1;
+        var b: u8 = 2;
+        try expect(@subWithOverflow(u8, a, b, &result));
+        try expect(result == 255);
+        b = 1;
+        try expect(!@subWithOverflow(u8, a, b, &result));
+        try expect(result == 0);
+    }
+
+    {
+        var a: usize = 6;
+        var b: usize = 6;
+        var res: usize = undefined;
+        try expect(!@subWithOverflow(usize, a, b, &res));
+        try expect(res == 0);
+    }
+
+    {
+        var a: isize = -6;
+        var b: isize = -6;
+        var res: isize = undefined;
+        try expect(!@subWithOverflow(isize, a, b, &res));
+        try expect(res == 0);
+    }
 }
 
 test "@shlWithOverflow" {

From 7f96ca101aec59e0c5508939c9ccc783d3898c2a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 14 May 2022 22:07:24 +0200
Subject: [PATCH 20/22] arm: sub_with_overflow should always track V flag

---
 src/arch/arm/CodeGen.zig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 02ca66f297..93e6e95ba9 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -1455,6 +1455,10 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
                         }
                     };
 
+                    if (tag == .sub_with_overflow) {
+                        break :result MCValue{ .register_v_flag = dest.register };
+                    }
+
                     switch (int_info.signedness) {
                         .unsigned => break :result MCValue{ .register_c_flag = dest.register },
                         .signed => break :result MCValue{ .register_v_flag = dest.register },

From a0de0adb8e22222716d4d42b26490461ecd67de3 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 14 May 2022 22:25:04 +0200
Subject: [PATCH 21/22] arm: disable recursive fibonacci

---
 test/cases/recursive_fibonacci.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cases/recursive_fibonacci.zig b/test/cases/recursive_fibonacci.zig
index 4e284e3fc1..a2b8436dd7 100644
--- a/test/cases/recursive_fibonacci.zig
+++ b/test/cases/recursive_fibonacci.zig
@@ -20,5 +20,5 @@ fn assert(ok: bool) void {
 }
 
 // run
-// target=arm-linux,x86_64-linux,x86_64-macos,wasm32-wasi
+// target=x86_64-linux,x86_64-macos,wasm32-wasi
 //

From f33b3fc3eae54b9d1159fc5a7a69a4b0e4aceca6 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 16 May 2022 14:30:28 -0700
Subject: [PATCH 22/22] zig.h: add casts for overflow arithmetic operations

This avoids the following error:

```
error: incompatible pointer types passing 'int64_t *' (aka 'long long *') to parameter of type 'long *'
    overflow = __builtin_saddl_overflow(lhs, rhs, res);
                                                  ^~~
```

My previous understanding was that this error would not occur because
prior to this line we check that int64_t is equivalent to long, like
this:

```c
```

However, it appears that this is still a warning in C if int64_t is
primarily aliased to `long long`, even though `long` and `long long` are
the same thing.
---
 src/link/C/zig.h | 162 +++++++++++++++++++++++------------------------
 1 file changed, 81 insertions(+), 81 deletions(-)

diff --git a/src/link/C/zig.h b/src/link/C/zig.h
index e3a0447c05..43d9913039 100644
--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@@ -415,15 +415,15 @@ static inline long long zig_subw_longlong(long long lhs, long long rhs, long lon
 static inline bool zig_addo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
 #if defined(__GNUC__) && INT8_MAX == INT_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_sadd_overflow(lhs, rhs, res);
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT8_MAX == LONG_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_saddl_overflow(lhs, rhs, res);
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_saddll_overflow(lhs, rhs, res);
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int16_t big_result = (int16_t)lhs + (int16_t)rhs;
@@ -442,15 +442,15 @@ static inline bool zig_addo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min,
 static inline bool zig_addo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
 #if defined(__GNUC__) && INT16_MAX == INT_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_sadd_overflow(lhs, rhs, res);
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT16_MAX == LONG_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_saddl_overflow(lhs, rhs, res);
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_saddll_overflow(lhs, rhs, res);
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int32_t big_result = (int32_t)lhs + (int32_t)rhs;
@@ -469,15 +469,15 @@ static inline bool zig_addo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t
 static inline bool zig_addo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
 #if defined(__GNUC__) && INT32_MAX == INT_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_sadd_overflow(lhs, rhs, res);
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT32_MAX == LONG_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_saddl_overflow(lhs, rhs, res);
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_saddll_overflow(lhs, rhs, res);
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int64_t big_result = (int64_t)lhs + (int64_t)rhs;
@@ -496,11 +496,11 @@ static inline bool zig_addo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t
 static inline bool zig_addo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
     bool overflow;
 #if defined(__GNUC__) && INT64_MAX == INT_MAX
-    overflow = __builtin_sadd_overflow(lhs, rhs, res);
+    overflow = __builtin_sadd_overflow(lhs, rhs, (int*)res);
 #elif defined(__GNUC__) && INT64_MAX == LONG_MAX
-    overflow = __builtin_saddl_overflow(lhs, rhs, res);
+    overflow = __builtin_saddl_overflow(lhs, rhs, (long*)res);
 #elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
-    overflow = __builtin_saddll_overflow(lhs, rhs, res);
+    overflow = __builtin_saddll_overflow(lhs, rhs, (long long*)res);
 #else
     int int_overflow;
     *res = __addodi4(lhs, rhs, &int_overflow);
@@ -521,11 +521,11 @@ static inline bool zig_addo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t
 static inline bool zig_addo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
     bool overflow;
 #if defined(__GNUC__) && INT128_MAX == INT_MAX
-    overflow = __builtin_sadd_overflow(lhs, rhs, res);
+    overflow = __builtin_sadd_overflow(lhs, rhs, (int*)res);
 #elif defined(__GNUC__) && INT128_MAX == LONG_MAX
-    overflow = __builtin_saddl_overflow(lhs, rhs, res);
+    overflow = __builtin_saddl_overflow(lhs, rhs, (long*)res);
 #elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
-    overflow = __builtin_saddll_overflow(lhs, rhs, res);
+    overflow = __builtin_saddll_overflow(lhs, rhs, (long long*)res);
 #else
     int int_overflow;
     *res = __addoti4(lhs, rhs, &int_overflow);
@@ -546,15 +546,15 @@ static inline bool zig_addo_i128(int128_t lhs, int128_t rhs, int128_t *res, int1
 static inline bool zig_addo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
 #if defined(__GNUC__) && UINT8_MAX == UINT_MAX
     if (max == UINT8_MAX) {
-        return __builtin_uadd_overflow(lhs, rhs, res);
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
     }
 #elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
     if (max == UINT8_MAX) {
-        return __builtin_uaddl_overflow(lhs, rhs, res);
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
     }
 #elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
     if (max == UINT8_MAX) {
-        return __builtin_uaddll_overflow(lhs, rhs, res);
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
     }
 #endif
     uint16_t big_result = (uint16_t)lhs + (uint16_t)rhs;
@@ -569,15 +569,15 @@ static inline bool zig_addo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t m
 static inline uint16_t zig_addo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
 #if defined(__GNUC__) && UINT16_MAX == UINT_MAX
     if (max == UINT16_MAX) {
-        return __builtin_uadd_overflow(lhs, rhs, res);
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
     }
 #elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
     if (max == UINT16_MAX) {
-        return __builtin_uaddl_overflow(lhs, rhs, res);
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
     }
 #elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
     if (max == UINT16_MAX) {
-        return __builtin_uaddll_overflow(lhs, rhs, res);
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
     }
 #endif
     uint32_t big_result = (uint32_t)lhs + (uint32_t)rhs;
@@ -592,15 +592,15 @@ static inline uint16_t zig_addo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, u
 static inline uint32_t zig_addo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
 #if defined(__GNUC__) && UINT32_MAX == UINT_MAX
     if (max == UINT32_MAX) {
-        return __builtin_uadd_overflow(lhs, rhs, res);
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
     }
 #elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
     if (max == UINT32_MAX) {
-        return __builtin_uaddl_overflow(lhs, rhs, res);
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
     }
 #elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
     if (max == UINT32_MAX) {
-        return __builtin_uaddll_overflow(lhs, rhs, res);
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
     }
 #endif
     uint64_t big_result = (uint64_t)lhs + (uint64_t)rhs;
@@ -615,11 +615,11 @@ static inline uint32_t zig_addo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, u
 static inline uint64_t zig_addo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
     bool overflow;
 #if defined(__GNUC__) && UINT64_MAX == UINT_MAX
-    overflow = __builtin_uadd_overflow(lhs, rhs, res);
+    overflow = __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
 #elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
-    overflow = __builtin_uaddl_overflow(lhs, rhs, res);
+    overflow = __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
 #elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
-    overflow = __builtin_uaddll_overflow(lhs, rhs, res);
+    overflow = __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
 #else
     int int_overflow;
     *res = __uaddodi4(lhs, rhs, &int_overflow);
@@ -645,15 +645,15 @@ static inline uint128_t zig_addo_u128(uint128_t lhs, uint128_t rhs, uint128_t *r
 static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
 #if defined(__GNUC__) && INT8_MAX == INT_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_ssub_overflow(lhs, rhs, res);
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT8_MAX == LONG_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_ssubl_overflow(lhs, rhs, res);
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_ssubll_overflow(lhs, rhs, res);
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int16_t big_result = (int16_t)lhs - (int16_t)rhs;
@@ -672,15 +672,15 @@ static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min,
 static inline bool zig_subo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
 #if defined(__GNUC__) && INT16_MAX == INT_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_ssub_overflow(lhs, rhs, res);
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT16_MAX == LONG_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_ssubl_overflow(lhs, rhs, res);
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_ssubll_overflow(lhs, rhs, res);
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int32_t big_result = (int32_t)lhs - (int32_t)rhs;
@@ -699,15 +699,15 @@ static inline bool zig_subo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t
 static inline bool zig_subo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
 #if defined(__GNUC__) && INT32_MAX == INT_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_ssub_overflow(lhs, rhs, res);
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT32_MAX == LONG_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_ssubl_overflow(lhs, rhs, res);
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_ssubll_overflow(lhs, rhs, res);
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int64_t big_result = (int64_t)lhs - (int64_t)rhs;
@@ -726,11 +726,11 @@ static inline bool zig_subo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t
 static inline bool zig_subo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
     bool overflow;
 #if defined(__GNUC__) && INT64_MAX == INT_MAX
-    overflow = __builtin_ssub_overflow(lhs, rhs, res);
+    overflow = __builtin_ssub_overflow(lhs, rhs, (int*)res);
 #elif defined(__GNUC__) && INT64_MAX == LONG_MAX
-    overflow = __builtin_ssubl_overflow(lhs, rhs, res);
+    overflow = __builtin_ssubl_overflow(lhs, rhs, (long*)res);
 #elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
-    overflow = __builtin_ssubll_overflow(lhs, rhs, res);
+    overflow = __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
 #else
     int int_overflow;
     *res = __subodi4(lhs, rhs, &int_overflow);
@@ -751,11 +751,11 @@ static inline bool zig_subo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t
 static inline bool zig_subo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
     bool overflow;
 #if defined(__GNUC__) && INT128_MAX == INT_MAX
-    overflow = __builtin_ssub_overflow(lhs, rhs, res);
+    overflow = __builtin_ssub_overflow(lhs, rhs, (int*)res);
 #elif defined(__GNUC__) && INT128_MAX == LONG_MAX
-    overflow = __builtin_ssubl_overflow(lhs, rhs, res);
+    overflow = __builtin_ssubl_overflow(lhs, rhs, (long*)res);
 #elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
-    overflow = __builtin_ssubll_overflow(lhs, rhs, res);
+    overflow = __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
 #else
     int int_overflow;
     *res = __suboti4(lhs, rhs, &int_overflow);
@@ -775,11 +775,11 @@ static inline bool zig_subo_i128(int128_t lhs, int128_t rhs, int128_t *res, int1
 
 static inline bool zig_subo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
 #if defined(__GNUC__) && UINT8_MAX == UINT_MAX
-    return __builtin_usub_overflow(lhs, rhs, res);
+    return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
 #elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
-    return __builtin_usubl_overflow(lhs, rhs, res);
+    return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
 #elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
-    return __builtin_usubll_overflow(lhs, rhs, res);
+    return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
 #endif
     if (rhs > lhs) {
         *res = max - (rhs - lhs - 1);
@@ -791,11 +791,11 @@ static inline bool zig_subo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t m
 
 static inline uint16_t zig_subo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
 #if defined(__GNUC__) && UINT16_MAX == UINT_MAX
-    return __builtin_usub_overflow(lhs, rhs, res);
+    return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
 #elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
-    return __builtin_usubl_overflow(lhs, rhs, res);
+    return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
 #elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
-    return __builtin_usubll_overflow(lhs, rhs, res);
+    return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
 #endif
     if (rhs > lhs) {
         *res = max - (rhs - lhs - 1);
@@ -808,11 +808,11 @@ static inline uint16_t zig_subo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, u
 static inline uint32_t zig_subo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
     if (max == UINT32_MAX) {
 #if defined(__GNUC__) && UINT32_MAX == UINT_MAX
-        return __builtin_usub_overflow(lhs, rhs, res);
+        return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
 #elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
-        return __builtin_usubl_overflow(lhs, rhs, res);
+        return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
 #elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
-        return __builtin_usubll_overflow(lhs, rhs, res);
+        return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
 #endif
         int int_overflow;
         *res = __usubosi4(lhs, rhs, &int_overflow);
@@ -830,11 +830,11 @@ static inline uint32_t zig_subo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, u
 static inline uint64_t zig_subo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
     if (max == UINT64_MAX) {
 #if defined(__GNUC__) && UINT64_MAX == UINT_MAX
-        return __builtin_usub_overflow(lhs, rhs, res);
+        return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
 #elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
-        return __builtin_usubl_overflow(lhs, rhs, res);
+        return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
 #elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
-        return __builtin_usubll_overflow(lhs, rhs, res);
+        return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
 #else
         int int_overflow;
         *res = __usubodi4(lhs, rhs, &int_overflow);
@@ -868,15 +868,15 @@ static inline uint128_t zig_subo_u128(uint128_t lhs, uint128_t rhs, uint128_t *r
 static inline bool zig_mulo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
 #if defined(__GNUC__) && INT8_MAX == INT_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_smul_overflow(lhs, rhs, res);
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT8_MAX == LONG_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_smull_overflow(lhs, rhs, res);
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
     if (min == INT8_MIN && max == INT8_MAX) {
-        return __builtin_smulll_overflow(lhs, rhs, res);
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int16_t big_result = (int16_t)lhs * (int16_t)rhs;
@@ -895,15 +895,15 @@ static inline bool zig_mulo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min,
 static inline bool zig_mulo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
 #if defined(__GNUC__) && INT16_MAX == INT_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_smul_overflow(lhs, rhs, res);
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT16_MAX == LONG_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_smull_overflow(lhs, rhs, res);
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
     if (min == INT16_MIN && max == INT16_MAX) {
-        return __builtin_smulll_overflow(lhs, rhs, res);
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int32_t big_result = (int32_t)lhs * (int32_t)rhs;
@@ -922,15 +922,15 @@ static inline bool zig_mulo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t
 static inline bool zig_mulo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
 #if defined(__GNUC__) && INT32_MAX == INT_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_smul_overflow(lhs, rhs, res);
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
     }
 #elif defined(__GNUC__) && INT32_MAX == LONG_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_smull_overflow(lhs, rhs, res);
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
     }
 #elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
     if (min == INT32_MIN && max == INT32_MAX) {
-        return __builtin_smulll_overflow(lhs, rhs, res);
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
     }
 #endif
     int64_t big_result = (int64_t)lhs * (int64_t)rhs;
@@ -949,11 +949,11 @@ static inline bool zig_mulo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t
 static inline bool zig_mulo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
     bool overflow;
 #if defined(__GNUC__) && INT64_MAX == INT_MAX
-    overflow = __builtin_smul_overflow(lhs, rhs, res);
+    overflow = __builtin_smul_overflow(lhs, rhs, (int*)res);
 #elif defined(__GNUC__) && INT64_MAX == LONG_MAX
-    overflow = __builtin_smull_overflow(lhs, rhs, res);
+    overflow = __builtin_smull_overflow(lhs, rhs, (long*)res);
 #elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
-    overflow = __builtin_smulll_overflow(lhs, rhs, res);
+    overflow = __builtin_smulll_overflow(lhs, rhs, (long long*)res);
 #else
     int int_overflow;
     *res = __mulodi4(lhs, rhs, &int_overflow);
@@ -974,11 +974,11 @@ static inline bool zig_mulo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t
 static inline bool zig_mulo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
     bool overflow;
 #if defined(__GNUC__) && INT128_MAX == INT_MAX
-    overflow = __builtin_smul_overflow(lhs, rhs, res);
+    overflow = __builtin_smul_overflow(lhs, rhs, (int*)res);
 #elif defined(__GNUC__) && INT128_MAX == LONG_MAX
-    overflow = __builtin_smull_overflow(lhs, rhs, res);
+    overflow = __builtin_smull_overflow(lhs, rhs, (long*)res);
 #elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
-    overflow = __builtin_smulll_overflow(lhs, rhs, res);
+    overflow = __builtin_smulll_overflow(lhs, rhs, (long long*)res);
 #else
     int int_overflow;
     *res = __muloti4(lhs, rhs, &int_overflow);
@@ -999,15 +999,15 @@ static inline bool zig_mulo_i128(int128_t lhs, int128_t rhs, int128_t *res, int1
 static inline bool zig_mulo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
 #if defined(__GNUC__) && UINT8_MAX == UINT_MAX
     if (max == UINT8_MAX) {
-        return __builtin_umul_overflow(lhs, rhs, res);
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
     }
 #elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
     if (max == UINT8_MAX) {
-        return __builtin_umull_overflow(lhs, rhs, res);
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
     }
 #elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
     if (max == UINT8_MAX) {
-        return __builtin_umulll_overflow(lhs, rhs, res);
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
     }
 #endif
     uint16_t big_result = (uint16_t)lhs * (uint16_t)rhs;
@@ -1022,15 +1022,15 @@ static inline bool zig_mulo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t m
 static inline uint16_t zig_mulo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
 #if defined(__GNUC__) && UINT16_MAX == UINT_MAX
     if (max == UINT16_MAX) {
-        return __builtin_umul_overflow(lhs, rhs, res);
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
     }
 #elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
     if (max == UINT16_MAX) {
-        return __builtin_umull_overflow(lhs, rhs, res);
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
     }
 #elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
     if (max == UINT16_MAX) {
-        return __builtin_umulll_overflow(lhs, rhs, res);
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
     }
 #endif
     uint32_t big_result = (uint32_t)lhs * (uint32_t)rhs;
@@ -1045,15 +1045,15 @@ static inline uint16_t zig_mulo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, u
 static inline uint32_t zig_mulo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
 #if defined(__GNUC__) && UINT32_MAX == UINT_MAX
     if (max == UINT32_MAX) {
-        return __builtin_umul_overflow(lhs, rhs, res);
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
     }
 #elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
     if (max == UINT32_MAX) {
-        return __builtin_umull_overflow(lhs, rhs, res);
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
     }
 #elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
     if (max == UINT32_MAX) {
-        return __builtin_umulll_overflow(lhs, rhs, res);
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
     }
 #endif
     uint64_t big_result = (uint64_t)lhs * (uint64_t)rhs;
@@ -1068,11 +1068,11 @@ static inline uint32_t zig_mulo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, u
 static inline uint64_t zig_mulo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
     bool overflow;
 #if defined(__GNUC__) && UINT64_MAX == UINT_MAX
-    overflow = __builtin_umul_overflow(lhs, rhs, res);
+    overflow = __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
 #elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
-    overflow = __builtin_umull_overflow(lhs, rhs, res);
+    overflow = __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
 #elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
-    overflow = __builtin_umulll_overflow(lhs, rhs, res);
+    overflow = __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
 #else
     int int_overflow;
     *res = __umulodi4(lhs, rhs, &int_overflow);