Merge pull request #11316 from wsengir/stage2-overflow-safety

stage2: vectorized overflow arithmetic, integer overflow safety, left-shift overflow safety
2026-02-21 16:54:52 +00:00 · 2022-05-16 20:40:57 -04:00 · 2022-05-16 20:40:57 -04:00 · 5888446c03
commit 5888446c03
parent 7a4758ed78 f33b3fc3ea
14 changed files with 1476 additions and 269 deletions
--- a/lib/std/builtin.zig
+++ b/lib/std/builtin.zig
@ -767,8 +767,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace) noreturn

    // Until self-hosted catches up with stage1 language features, we have a simpler
    // default panic function:
-    if ((builtin.zig_backend == .stage2_llvm and builtin.link_libc) or
-        builtin.zig_backend == .stage2_c or
+    if (builtin.zig_backend == .stage2_c or
        builtin.zig_backend == .stage2_wasm or
        builtin.zig_backend == .stage2_arm or
        builtin.zig_backend == .stage2_aarch64 or
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -1574,6 +1574,12 @@ fn failWithErrorSetCodeMissing(
    });
 }

+fn failWithIntegerOverflow(sema: *Sema, block: *Block, src: LazySrcLoc, int_ty: Type, val: Value) CompileError {
+    return sema.fail(block, src, "overflow of integer type '{}' with value '{}'", .{
+        int_ty.fmt(sema.mod), val.fmtValue(Type.@"comptime_int", sema.mod),
+    });
+}
+
 /// We don't return a pointer to the new error note because the pointer
 /// becomes invalid when you add another one.
 fn errNote(
@ -8820,8 +8826,6 @@ fn zirShl(
        return sema.addConstant(lhs_ty, val);
    } else lhs_src;

-    // TODO: insert runtime safety check for shl_exact
-
    const new_rhs = if (air_tag == .shl_sat) rhs: {
        // Limit the RHS type for saturating shl to be an integer as small as the LHS.
        if (rhs_is_comptime_int or
@ -8839,6 +8843,41 @@ fn zirShl(
    } else rhs;

    try sema.requireRuntimeBlock(block, runtime_src);
+    if (block.wantSafety()) {
+        const maybe_op_ov: ?Air.Inst.Tag = switch (air_tag) {
+            .shl_exact => .shl_with_overflow,
+            else => null,
+        };
+        if (maybe_op_ov) |op_ov_tag| {
+            const op_ov_tuple_ty = try sema.overflowArithmeticTupleType(lhs_ty);
+            const op_ov = try block.addInst(.{
+                .tag = op_ov_tag,
+                .data = .{ .ty_pl = .{
+                    .ty = try sema.addType(op_ov_tuple_ty),
+                    .payload = try sema.addExtra(Air.Bin{
+                        .lhs = lhs,
+                        .rhs = rhs,
+                    }),
+                } },
+            });
+            const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
+            const any_ov_bit = if (lhs_ty.zigTypeTag() == .Vector)
+                try block.addInst(.{
+                    .tag = .reduce,
+                    .data = .{ .reduce = .{
+                        .operand = ov_bit,
+                        .operation = .Or,
+                    } },
+                })
+            else
+                ov_bit;
+            const zero_ov = try sema.addConstant(Type.@"u1", Value.zero);
+            const no_ov = try block.addBinOp(.cmp_eq, any_ov_bit, zero_ov);
+
+            try sema.addSafetyCheck(block, no_ov, .shl_overflow);
+            return sema.tupleFieldValByIndex(block, src, op_ov, 0, op_ov_tuple_ty);
+        }
+    }
    return block.addBinOp(air_tag, lhs, new_rhs);
 }

@ -9417,32 +9456,29 @@ fn zirOverflowArithmetic(
    const ptr = sema.resolveInst(extra.ptr);

    const lhs_ty = sema.typeOf(lhs);
+    const rhs_ty = sema.typeOf(rhs);
    const mod = sema.mod;
    const target = mod.getTarget();

    // Note, the types of lhs/rhs (also for shifting)/ptr are already correct as ensured by astgen.
+    try sema.checkVectorizableBinaryOperands(block, src, lhs_ty, rhs_ty, lhs_src, rhs_src);
    const dest_ty = lhs_ty;
-    if (dest_ty.zigTypeTag() != .Int) {
-        return sema.fail(block, src, "expected integer type, found '{}'", .{dest_ty.fmt(mod)});
+    if (dest_ty.scalarType().zigTypeTag() != .Int) {
+        return sema.fail(block, src, "expected vector of integers or integer type, found '{}'", .{dest_ty.fmt(mod)});
    }

    const maybe_lhs_val = try sema.resolveMaybeUndefVal(block, lhs_src, lhs);
    const maybe_rhs_val = try sema.resolveMaybeUndefVal(block, rhs_src, rhs);

-    const types = try sema.arena.alloc(Type, 2);
-    const values = try sema.arena.alloc(Value, 2);
-    const tuple_ty = try Type.Tag.tuple.create(sema.arena, .{
-        .types = types,
-        .values = values,
-    });
-
-    types[0] = dest_ty;
-    types[1] = Type.initTag(.u1);
-    values[0] = Value.initTag(.unreachable_value);
-    values[1] = Value.initTag(.unreachable_value);
+    const tuple_ty = try sema.overflowArithmeticTupleType(dest_ty);
+    const ov_ty = tuple_ty.tupleFields().types[1];
+    // TODO: Remove and use `ov_ty` instead.
+    //       This is a temporary type used until overflow arithmetic properly returns `u1` instead of `bool`.
+    const overflowed_ty = if (dest_ty.zigTypeTag() == .Vector) try Type.vector(sema.arena, dest_ty.vectorLen(), Type.@"bool") else Type.@"bool";

    const result: struct {
-        overflowed: enum { yes, no, undef },
+        /// TODO: Rename to `overflow_bit` and make of type `u1`.
+        overflowed: Air.Inst.Ref,
        wrapped: Air.Inst.Ref,
    } = result: {
        switch (zir_tag) {
@ -9452,23 +9488,24 @@ fn zirOverflowArithmetic(
                // Otherwise, if either of the argument is undefined, undefined is returned.
                if (maybe_lhs_val) |lhs_val| {
                    if (!lhs_val.isUndef() and lhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = rhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                    }
                }
                if (maybe_rhs_val) |rhs_val| {
                    if (!rhs_val.isUndef() and rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                    }
                }
                if (maybe_lhs_val) |lhs_val| {
                    if (maybe_rhs_val) |rhs_val| {
                        if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                        }

                        const result = try lhs_val.intAddWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                    }
                }
            },
@ -9477,17 +9514,18 @@ fn zirOverflowArithmetic(
                // Otherwise, if either result is undefined, both results are undefined.
                if (maybe_rhs_val) |rhs_val| {
                    if (rhs_val.isUndef()) {
-                        break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                        break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                    } else if (rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                    } else if (maybe_lhs_val) |lhs_val| {
                        if (lhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                        }

                        const result = try lhs_val.intSubWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                    }
                }
            },
@ -9498,9 +9536,9 @@ fn zirOverflowArithmetic(
                if (maybe_lhs_val) |lhs_val| {
                    if (!lhs_val.isUndef()) {
                        if (lhs_val.compareWithZero(.eq)) {
-                            break :result .{ .overflowed = .no, .wrapped = lhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                        } else if (lhs_val.compare(.eq, Value.one, dest_ty, mod)) {
-                            break :result .{ .overflowed = .no, .wrapped = rhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                        }
                    }
                }
@ -9508,9 +9546,9 @@ fn zirOverflowArithmetic(
                if (maybe_rhs_val) |rhs_val| {
                    if (!rhs_val.isUndef()) {
                        if (rhs_val.compareWithZero(.eq)) {
-                            break :result .{ .overflowed = .no, .wrapped = rhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                        } else if (rhs_val.compare(.eq, Value.one, dest_ty, mod)) {
-                            break :result .{ .overflowed = .no, .wrapped = lhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                        }
                    }
                }
@ -9518,12 +9556,13 @@ fn zirOverflowArithmetic(
                if (maybe_lhs_val) |lhs_val| {
                    if (maybe_rhs_val) |rhs_val| {
                        if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                        }

                        const result = try lhs_val.intMulWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                    }
                }
            },
@ -9533,23 +9572,24 @@ fn zirOverflowArithmetic(
                // Oterhwise if either of the arguments is undefined, both results are undefined.
                if (maybe_lhs_val) |lhs_val| {
                    if (!lhs_val.isUndef() and lhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                    }
                }
                if (maybe_rhs_val) |rhs_val| {
                    if (!rhs_val.isUndef() and rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                    }
                }
                if (maybe_lhs_val) |lhs_val| {
                    if (maybe_rhs_val) |rhs_val| {
                        if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                        }

                        const result = try lhs_val.shlWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                    }
                }
            },
@ -9577,21 +9617,40 @@ fn zirOverflowArithmetic(
            } },
        });

-        const wrapped = try block.addStructFieldVal(tuple, 0, dest_ty);
+        const wrapped = try sema.tupleFieldValByIndex(block, src, tuple, 0, tuple_ty);
        try sema.storePtr2(block, src, ptr, ptr_src, wrapped, src, .store);

-        const overflow_bit = try block.addStructFieldVal(tuple, 1, Type.initTag(.u1));
-        const zero_u1 = try sema.addConstant(Type.initTag(.u1), Value.zero);
-        return try block.addBinOp(.cmp_neq, overflow_bit, zero_u1);
+        const overflow_bit = try sema.tupleFieldValByIndex(block, src, tuple, 1, tuple_ty);
+        const zero_ov_val = if (dest_ty.zigTypeTag() == .Vector) try Value.Tag.repeated.create(sema.arena, Value.zero) else Value.zero;
+        const zero_ov = try sema.addConstant(ov_ty, zero_ov_val);
+
+        const overflowed_inst = if (dest_ty.zigTypeTag() == .Vector)
+            block.addCmpVector(overflow_bit, .zero, .neq, try sema.addType(ov_ty))
+        else
+            block.addBinOp(.cmp_neq, overflow_bit, zero_ov);
+        return overflowed_inst;
    };

    try sema.storePtr2(block, src, ptr, ptr_src, result.wrapped, src, .store);
+    return result.overflowed;
+}

-    return switch (result.overflowed) {
-        .yes => Air.Inst.Ref.bool_true,
-        .no => Air.Inst.Ref.bool_false,
-        .undef => try sema.addConstUndef(Type.bool),
-    };
+fn overflowArithmeticTupleType(sema: *Sema, ty: Type) !Type {
+    const ov_ty = if (ty.zigTypeTag() == .Vector) try Type.vector(sema.arena, ty.vectorLen(), Type.@"u1") else Type.@"u1";
+
+    const types = try sema.arena.alloc(Type, 2);
+    const values = try sema.arena.alloc(Value, 2);
+    const tuple_ty = try Type.Tag.tuple.create(sema.arena, .{
+        .types = types,
+        .values = values,
+    });
+
+    types[0] = ty;
+    types[1] = ov_ty;
+    values[0] = Value.initTag(.unreachable_value);
+    values[1] = Value.initTag(.unreachable_value);
+
+    return tuple_ty;
 }

 fn analyzeArithmetic(
@ -9691,10 +9750,11 @@ fn analyzeArithmetic(
                    }
                    if (maybe_rhs_val) |rhs_val| {
                        if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intAdd(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const sum = try lhs_val.intAdd(rhs_val, resolved_type, sema.arena, target);
+                            if (!sum.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, sum);
+                            }
+                            return sema.addConstant(resolved_type, sum);
                        } else {
                            return sema.addConstant(
                                resolved_type,
@ -9784,10 +9844,11 @@ fn analyzeArithmetic(
                    }
                    if (maybe_rhs_val) |rhs_val| {
                        if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intSub(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const diff = try lhs_val.intSub(rhs_val, resolved_type, sema.arena, target);
+                            if (!diff.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, diff);
+                            }
+                            return sema.addConstant(resolved_type, diff);
                        } else {
                            return sema.addConstant(
                                resolved_type,
@ -10157,10 +10218,11 @@ fn analyzeArithmetic(
                            }
                        }
                        if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intMul(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const product = try lhs_val.intMul(rhs_val, resolved_type, sema.arena, target);
+                            if (!product.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, product);
+                            }
+                            return sema.addConstant(resolved_type, product);
                        } else {
                            return sema.addConstant(
                                resolved_type,
@ -10448,6 +10510,45 @@ fn analyzeArithmetic(
    };

    try sema.requireRuntimeBlock(block, rs.src);
+    if (block.wantSafety()) {
+        if (scalar_tag == .Int) {
+            const maybe_op_ov: ?Air.Inst.Tag = switch (rs.air_tag) {
+                .add => .add_with_overflow,
+                .sub => .sub_with_overflow,
+                .mul => .mul_with_overflow,
+                else => null,
+            };
+            if (maybe_op_ov) |op_ov_tag| {
+                const op_ov_tuple_ty = try sema.overflowArithmeticTupleType(resolved_type);
+                const op_ov = try block.addInst(.{
+                    .tag = op_ov_tag,
+                    .data = .{ .ty_pl = .{
+                        .ty = try sema.addType(op_ov_tuple_ty),
+                        .payload = try sema.addExtra(Air.Bin{
+                            .lhs = casted_lhs,
+                            .rhs = casted_rhs,
+                        }),
+                    } },
+                });
+                const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
+                const any_ov_bit = if (resolved_type.zigTypeTag() == .Vector)
+                    try block.addInst(.{
+                        .tag = .reduce,
+                        .data = .{ .reduce = .{
+                            .operand = ov_bit,
+                            .operation = .Or,
+                        } },
+                    })
+                else
+                    ov_bit;
+                const zero_ov = try sema.addConstant(Type.@"u1", Value.zero);
+                const no_ov = try block.addBinOp(.cmp_eq, any_ov_bit, zero_ov);
+
+                try sema.addSafetyCheck(block, no_ov, .integer_overflow);
+                return sema.tupleFieldValByIndex(block, src, op_ov, 0, op_ov_tuple_ty);
+            }
+        }
+    }
    return block.addBinOp(rs.air_tag, casted_lhs, casted_rhs);
 }

@ -16682,6 +16783,8 @@ pub const PanicId = enum {
    invalid_error_code,
    index_out_of_bounds,
    cast_truncated_data,
+    integer_overflow,
+    shl_overflow,
 };

 fn addSafetyCheck(
@ -16805,6 +16908,8 @@ fn safetyPanic(
        .invalid_error_code => "invalid error code",
        .index_out_of_bounds => "attempt to index out of bounds",
        .cast_truncated_data => "integer cast truncated bits",
+        .integer_overflow => "integer overflow",
+        .shl_overflow => "left shift overflowed bits",
    };

    const msg_inst = msg_inst: {
@ -23093,6 +23198,14 @@ fn addIntUnsigned(sema: *Sema, ty: Type, int: u64) CompileError!Air.Inst.Ref {
    return sema.addConstant(ty, try Value.Tag.int_u64.create(sema.arena, int));
 }

+fn addBool(sema: *Sema, ty: Type, boolean: bool) CompileError!Air.Inst.Ref {
+    return switch (ty.zigTypeTag()) {
+        .Vector => sema.addConstant(ty, try Value.Tag.repeated.create(sema.arena, Value.makeBool(boolean))),
+        .Bool => sema.resolveInst(if (boolean) .bool_true else .bool_false),
+        else => unreachable,
+    };
+}
+
 fn addConstUndef(sema: *Sema, ty: Type) CompileError!Air.Inst.Ref {
    return sema.addConstant(ty, Value.undef);
 }
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@ -1901,6 +1901,10 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
                            }
                        };

+                        if (tag == .sub_with_overflow) {
+                            break :result MCValue{ .register_v_flag = dest.register };
+                        }
+
                        switch (int_info.signedness) {
                            .unsigned => break :result MCValue{ .register_c_flag = dest.register },
                            .signed => break :result MCValue{ .register_v_flag = dest.register },
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@ -1455,6 +1455,10 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
                        }
                    };

+                    if (tag == .sub_with_overflow) {
+                        break :result MCValue{ .register_v_flag = dest.register };
+                    }
+
                    switch (int_info.signedness) {
                        .unsigned => break :result MCValue{ .register_c_flag = dest.register },
                        .signed => break :result MCValue{ .register_v_flag = dest.register },
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@ -1450,9 +1450,9 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
        .min => self.airMaxMin(inst, .min),
        .mul_add => self.airMulAdd(inst),

-        .add_with_overflow => self.airBinOpOverflow(inst, .add),
-        .sub_with_overflow => self.airBinOpOverflow(inst, .sub),
-        .shl_with_overflow => self.airBinOpOverflow(inst, .shl),
+        .add_with_overflow => self.airAddSubWithOverflow(inst, .add),
+        .sub_with_overflow => self.airAddSubWithOverflow(inst, .sub),
+        .shl_with_overflow => self.airShlWithOverflow(inst),
        .mul_with_overflow => self.airMulWithOverflow(inst),

        .clz => self.airClz(inst),
@ -3941,9 +3941,76 @@ fn airPtrSliceFieldPtr(self: *Self, inst: Air.Inst.Index, offset: u32) InnerErro
    return self.buildPointerOffset(slice_ptr, offset, .new);
 }

-fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
-    if (self.liveness.isUnused(inst)) return WValue{ .none = {} };
+fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
+    assert(op == .add or op == .sub);
+    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
+    const lhs_op = try self.resolveInst(extra.lhs);
+    const rhs_op = try self.resolveInst(extra.rhs);
+    const lhs_ty = self.air.typeOf(extra.lhs);

+    if (lhs_ty.zigTypeTag() == .Vector) {
+        return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    }
+
+    const int_info = lhs_ty.intInfo(self.target);
+    const is_signed = int_info.signedness == .signed;
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return self.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits});
+    };
+
+    const zero = switch (wasm_bits) {
+        32 => WValue{ .imm32 = 0 },
+        64 => WValue{ .imm64 = 0 },
+        else => unreachable,
+    };
+    const shift_amt = wasm_bits - int_info.bits;
+    const shift_val = switch (wasm_bits) {
+        32 => WValue{ .imm32 = shift_amt },
+        64 => WValue{ .imm64 = shift_amt },
+        else => unreachable,
+    };
+
+    // for signed integers, we first apply signed shifts by the difference in bits
+    // to get the signed value, as we store it internally as 2's complement.
+    const lhs = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shl = try self.binOp(lhs_op, shift_val, lhs_ty, .shl);
+        break :blk try self.binOp(shl, shift_val, lhs_ty, .shr);
+    } else lhs_op;
+    const rhs = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shl = try self.binOp(rhs_op, shift_val, lhs_ty, .shl);
+        break :blk try self.binOp(shl, shift_val, lhs_ty, .shr);
+    } else rhs_op;
+
+    const bin_op = try self.binOp(lhs, rhs, lhs_ty, op);
+    const result = if (wasm_bits != int_info.bits) blk: {
+        break :blk try self.wrapOperand(bin_op, lhs_ty);
+    } else bin_op;
+
+    const cmp_op: std.math.CompareOperator = if (op == .sub) .gt else .lt;
+    const overflow_bit: WValue = if (is_signed) blk: {
+        if (wasm_bits == int_info.bits) {
+            const cmp_zero = try self.cmp(rhs, zero, lhs_ty, cmp_op);
+            const lt = try self.cmp(bin_op, lhs, lhs_ty, .lt);
+            break :blk try self.binOp(cmp_zero, lt, Type.u32, .xor); // result of cmp_zero and lt is always 32bit
+        }
+        const shl = try self.binOp(bin_op, shift_val, lhs_ty, .shl);
+        const shr = try self.binOp(shl, shift_val, lhs_ty, .shr);
+        break :blk try self.cmp(shr, bin_op, lhs_ty, .neq);
+    } else if (wasm_bits == int_info.bits)
+        try self.cmp(bin_op, lhs, lhs_ty, cmp_op)
+    else
+        try self.cmp(bin_op, result, lhs_ty, .neq);
+
+    const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
+    try self.store(result_ptr, result, lhs_ty, 0);
+    const offset = @intCast(u32, lhs_ty.abiSize(self.target));
+    try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);
+
+    return result_ptr;
+}
+
+fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
    const lhs = try self.resolveInst(extra.lhs);
@ -3954,96 +4021,36 @@ fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue
        return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
    }

-    // We store the bit if it's overflowed or not in this. As it's zero-initialized
-    // we only need to update it if an overflow (or underflow) occured.
-    const overflow_bit = try self.allocLocal(Type.initTag(.u1));
    const int_info = lhs_ty.intInfo(self.target);
+    const is_signed = int_info.signedness == .signed;
    const wasm_bits = toWasmBits(int_info.bits) orelse {
-        return self.fail("TODO: Implement overflow arithmetic for integer bitsize: {d}", .{int_info.bits});
+        return self.fail("TODO: Implement shl_with_overflow for integer bitsize: {d}", .{int_info.bits});
    };

-    const zero = switch (wasm_bits) {
-        32 => WValue{ .imm32 = 0 },
-        64 => WValue{ .imm64 = 0 },
-        else => unreachable,
-    };
-    const int_max = (@as(u65, 1) << @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed))) - 1;
-    const int_max_wvalue = switch (wasm_bits) {
-        32 => WValue{ .imm32 = @intCast(u32, int_max) },
-        64 => WValue{ .imm64 = @intCast(u64, int_max) },
-        else => unreachable,
-    };
-    const int_min = if (int_info.signedness == .unsigned)
-        @as(i64, 0)
-    else
-        -@as(i64, 1) << @intCast(u6, int_info.bits - 1);
-    const int_min_wvalue = switch (wasm_bits) {
-        32 => WValue{ .imm32 = @bitCast(u32, @intCast(i32, int_min)) },
-        64 => WValue{ .imm64 = @bitCast(u64, int_min) },
-        else => unreachable,
-    };
+    const shl = try self.binOp(lhs, rhs, lhs_ty, .shl);
+    const result = if (wasm_bits != int_info.bits) blk: {
+        break :blk try self.wrapOperand(shl, lhs_ty);
+    } else shl;

-    if (int_info.signedness == .unsigned and op == .add) {
-        const diff = try self.binOp(int_max_wvalue, lhs, lhs_ty, .sub);
-        const cmp_res = try self.cmp(rhs, diff, lhs_ty, .gt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-    } else if (int_info.signedness == .unsigned and op == .sub) {
-        const cmp_res = try self.cmp(lhs, rhs, lhs_ty, .lt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-    } else if (int_info.signedness == .signed and op != .shl) {
-        // for overflow, we first check if lhs is > 0 (or lhs < 0 in case of subtraction). If not, we will not overflow.
-        // We first create an outer block, where we handle overflow.
-        // Then we create an inner block, where underflow is handled.
-        try self.startBlock(.block, wasm.block_empty);
-        try self.startBlock(.block, wasm.block_empty);
-        {
-            try self.emitWValue(lhs);
-            const cmp_result = try self.cmp(lhs, zero, lhs_ty, .lt);
-            try self.emitWValue(cmp_result);
-        }
-        try self.addLabel(.br_if, 0); // break to outer block, and handle underflow
-
-        // handle overflow
-        {
-            const diff = try self.binOp(int_max_wvalue, lhs, lhs_ty, .sub);
-            const cmp_res = try self.cmp(rhs, diff, lhs_ty, if (op == .add) .gt else .lt);
-            try self.emitWValue(cmp_res);
-            try self.addLabel(.local_set, overflow_bit.local);
-        }
-        try self.addLabel(.br, 1); // break from blocks, and continue regular flow.
-        try self.endBlock();
-
-        // handle underflow
-        {
-            const diff = try self.binOp(int_min_wvalue, lhs, lhs_ty, .sub);
-            const cmp_res = try self.cmp(rhs, diff, lhs_ty, if (op == .add) .lt else .gt);
-            try self.emitWValue(cmp_res);
-            try self.addLabel(.local_set, overflow_bit.local);
-        }
-        try self.endBlock();
-    }
-
-    const bin_op = if (op == .shl) blk: {
-        const tmp_val = try self.binOp(lhs, rhs, lhs_ty, op);
-        const cmp_res = try self.cmp(tmp_val, int_max_wvalue, lhs_ty, .gt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-
-        try self.emitWValue(tmp_val);
-        try self.emitWValue(int_max_wvalue);
-        switch (wasm_bits) {
-            32 => try self.addTag(.i32_and),
-            64 => try self.addTag(.i64_and),
+    const overflow_bit = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shift_amt = wasm_bits - int_info.bits;
+        const shift_val = switch (wasm_bits) {
+            32 => WValue{ .imm32 = shift_amt },
+            64 => WValue{ .imm64 = shift_amt },
            else => unreachable,
-        }
-        try self.addLabel(.local_set, tmp_val.local);
-        break :blk tmp_val;
-    } else try self.wrapBinOp(lhs, rhs, lhs_ty, op);
+        };
+
+        const secondary_shl = try self.binOp(shl, shift_val, lhs_ty, .shl);
+        const initial_shr = try self.binOp(secondary_shl, shift_val, lhs_ty, .shr);
+        const shr = try self.wrapBinOp(initial_shr, rhs, lhs_ty, .shr);
+        break :blk try self.cmp(lhs, shr, lhs_ty, .neq);
+    } else blk: {
+        const shr = try self.binOp(result, rhs, lhs_ty, .shr);
+        break :blk try self.cmp(lhs, shr, lhs_ty, .neq);
+    };

    const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
-    try self.store(result_ptr, bin_op, lhs_ty, 0);
+    try self.store(result_ptr, result, lhs_ty, 0);
    const offset = @intCast(u32, lhs_ty.abiSize(self.target));
    try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);

--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@ -1896,7 +1896,7 @@ fn lowerToMrEnc(
    const opc = getOpCode(tag, .mr, reg.size() == 8 or reg_or_mem.size() == 8).?;
    switch (reg_or_mem) {
        .register => |dst_reg| {
-            const encoder = try Encoder.init(code, 3);
+            const encoder = try Encoder.init(code, 4);
            if (dst_reg.size() == 16) {
                encoder.prefix16BitMode();
            }
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@ -1766,10 +1766,10 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO

            .mul_add => try airMulAdd(f, inst),

-            .add_with_overflow => try airAddWithOverflow(f, inst),
-            .sub_with_overflow => try airSubWithOverflow(f, inst),
-            .mul_with_overflow => try airMulWithOverflow(f, inst),
-            .shl_with_overflow => try airShlWithOverflow(f, inst),
+            .add_with_overflow => try airOverflow(f, inst, "addo_"),
+            .sub_with_overflow => try airOverflow(f, inst, "subo_"),
+            .mul_with_overflow => try airOverflow(f, inst, "mulo_"),
+            .shl_with_overflow => try airOverflow(f, inst, "shlo_"),

            .min => try airMinMax(f, inst, "<"),
            .max => try airMinMax(f, inst, ">"),
@ -2295,7 +2295,8 @@ fn airWrapOp(

    const bin_op = f.air.instructions.items(.data)[inst].bin_op;
    const inst_ty = f.air.typeOfIndex(inst);
-    const int_info = inst_ty.intInfo(f.object.dg.module.getTarget());
+    const target = f.object.dg.module.getTarget();
+    const int_info = inst_ty.intInfo(target);
    const bits = int_info.bits;

    // if it's an unsigned int with non-arbitrary bit size then we can just add
@ -2313,47 +2314,8 @@ fn airWrapOp(
        return f.fail("TODO: C backend: airWrapOp for large integers", .{});
    }

-    var min_buf: [80]u8 = undefined;
-    const min = switch (int_info.signedness) {
-        .unsigned => "0",
-        else => switch (inst_ty.tag()) {
-            .c_short => "SHRT_MIN",
-            .c_int => "INT_MIN",
-            .c_long => "LONG_MIN",
-            .c_longlong => "LLONG_MIN",
-            .isize => "INTPTR_MIN",
-            else => blk: {
-                const val = -1 * std.math.pow(i64, 2, @intCast(i64, bits - 1));
-                break :blk std.fmt.bufPrint(&min_buf, "{d}", .{val}) catch |err| switch (err) {
-                    error.NoSpaceLeft => unreachable,
-                };
-            },
-        },
-    };
-
    var max_buf: [80]u8 = undefined;
-    const max = switch (inst_ty.tag()) {
-        .c_short => "SHRT_MAX",
-        .c_ushort => "USHRT_MAX",
-        .c_int => "INT_MAX",
-        .c_uint => "UINT_MAX",
-        .c_long => "LONG_MAX",
-        .c_ulong => "ULONG_MAX",
-        .c_longlong => "LLONG_MAX",
-        .c_ulonglong => "ULLONG_MAX",
-        .isize => "INTPTR_MAX",
-        .usize => "UINTPTR_MAX",
-        else => blk: {
-            const pow_bits = switch (int_info.signedness) {
-                .signed => bits - 1,
-                .unsigned => bits,
-            };
-            const val = std.math.pow(u64, 2, pow_bits) - 1;
-            break :blk std.fmt.bufPrint(&max_buf, "{}", .{val}) catch |err| switch (err) {
-                error.NoSpaceLeft => unreachable,
-            };
-        },
-    };
+    const max = intMax(inst_ty, target, &max_buf);

    const lhs = try f.resolveInst(bin_op.lhs);
    const rhs = try f.resolveInst(bin_op.rhs);
@ -2369,10 +2331,7 @@ fn airWrapOp(
        .c_long => try w.writeAll("long"),
        .c_longlong => try w.writeAll("longlong"),
        else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
            for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                if (bits <= nbits) {
                    try w.print("{c}{d}", .{ prefix_byte, nbits });
@ -2390,6 +2349,9 @@ fn airWrapOp(
    try f.writeCValue(w, rhs);

    if (int_info.signedness == .signed) {
+        var min_buf: [80]u8 = undefined;
+        const min = intMin(inst_ty, target, &min_buf);
+
        try w.print(", {s}", .{min});
    }

@ -2475,10 +2437,7 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
        .c_long => try w.writeAll("long"),
        .c_longlong => try w.writeAll("longlong"),
        else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
            for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                if (bits <= nbits) {
                    try w.print("{c}{d}", .{ prefix_byte, nbits });
@ -2505,28 +2464,63 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
    return ret;
 }

-fn airAddWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO add with overflow", .{});
-}
+fn airOverflow(f: *Function, inst: Air.Inst.Index, op_abbrev: [*:0]const u8) !CValue {
+    if (f.liveness.isUnused(inst))
+        return CValue.none;

-fn airSubWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO sub with overflow", .{});
-}
+    const ty_pl = f.air.instructions.items(.data)[inst].ty_pl;
+    const bin_op = f.air.extraData(Air.Bin, ty_pl.payload).data;

-fn airMulWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO mul with overflow", .{});
-}
+    const lhs = try f.resolveInst(bin_op.lhs);
+    const rhs = try f.resolveInst(bin_op.rhs);

-fn airShlWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO shl with overflow", .{});
+    const inst_ty = f.air.typeOfIndex(inst);
+    const scalar_ty = f.air.typeOf(bin_op.lhs).scalarType();
+    const target = f.object.dg.module.getTarget();
+    const int_info = scalar_ty.intInfo(target);
+    const w = f.object.writer();
+    const c_bits = toCIntBits(int_info.bits) orelse
+        return f.fail("TODO: C backend: implement integer arithmetic larger than 128 bits", .{});
+
+    var max_buf: [80]u8 = undefined;
+    const max = intMax(scalar_ty, target, &max_buf);
+
+    const ret = try f.allocLocal(inst_ty, .Mut);
+    try w.writeAll(";");
+    try f.object.indent_writer.insertNewline();
+    try f.writeCValue(w, ret);
+
+    switch (int_info.signedness) {
+        .unsigned => {
+            try w.print(".field_1 = zig_{s}u{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}", .{max});
+        },
+        .signed => {
+            var min_buf: [80]u8 = undefined;
+            const min = intMin(scalar_ty, target, &min_buf);
+
+            try w.print(".field_1 = zig_{s}i{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}, {s}", .{ min, max });
+        },
+    }
+
+    try w.writeAll(");");
+    try f.object.indent_writer.insertNewline();
+    return ret;
 }

 fn airNot(f: *Function, inst: Air.Inst.Index) !CValue {
@ -3571,11 +3565,7 @@ fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8) !C
        return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});

    try writer.print(" = zig_{s}_", .{fn_name});
-    const prefix_byte: u8 = switch (int_info.signedness) {
-        .signed => 'i',
-        .unsigned => 'u',
-    };
-    try writer.print("{c}{d}(", .{ prefix_byte, c_bits });
+    try writer.print("{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
    try f.writeCValue(writer, try f.resolveInst(operand));
    try writer.print(", {d});\n", .{int_info.bits});
    return local;
@ -3596,11 +3586,7 @@ fn airBinOpBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u
        const int_info = lhs_ty.intInfo(target);
        const c_bits = toCIntBits(int_info.bits) orelse
            return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
-        const prefix_byte: u8 = switch (int_info.signedness) {
-            .signed => 'i',
-            .unsigned => 'u',
-        };
-        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, prefix_byte, c_bits });
+        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, signAbbrev(int_info.signedness), c_bits });
    } else if (lhs_ty.isRuntimeFloat()) {
        const c_bits = lhs_ty.floatBits(target);
        try writer.print(" = zig_{s}_f{d}", .{ fn_name, c_bits });
@ -4085,3 +4071,53 @@ fn toCIntBits(zig_bits: u32) ?u32 {
    }
    return null;
 }
+
+fn signAbbrev(signedness: std.builtin.Signedness) u8 {
+    return switch (signedness) {
+        .signed => 'i',
+        .unsigned => 'u',
+    };
+}
+
+fn intMax(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MAX",
+        .c_ushort => return "USHRT_MAX",
+        .c_int => return "INT_MAX",
+        .c_uint => return "UINT_MAX",
+        .c_long => return "LONG_MAX",
+        .c_ulong => return "ULONG_MAX",
+        .c_longlong => return "LLONG_MAX",
+        .c_ulonglong => return "ULLONG_MAX",
+        else => {
+            const int_info = ty.intInfo(target);
+            const rhs = @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed));
+            const val = (@as(u128, 1) << rhs) - 1;
+            // TODO make this integer literal have a suffix if necessary (such as "ull")
+            return std.fmt.bufPrint(buf, "{}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
+
+fn intMin(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MIN",
+        .c_int => return "INT_MIN",
+        .c_long => return "LONG_MIN",
+        .c_longlong => return "LLONG_MIN",
+        else => {
+            const int_info = ty.intInfo(target);
+            assert(int_info.signedness == .signed);
+            const val = v: {
+                if (int_info.bits == 0) break :v 0;
+                const rhs = @intCast(u7, (int_info.bits - 1));
+                break :v -(@as(i128, 1) << rhs);
+            };
+            return std.fmt.bufPrint(buf, "{d}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@ -5604,14 +5604,25 @@ pub const FuncGen = struct {
        const rhs = try self.resolveInst(extra.rhs);

        const lhs_ty = self.air.typeOf(extra.lhs);
+        const scalar_ty = lhs_ty.scalarType();
+        const dest_ty = self.air.typeOfIndex(inst);

-        const intrinsic_name = if (lhs_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;
+        const intrinsic_name = if (scalar_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;

        const llvm_lhs_ty = try self.dg.llvmType(lhs_ty);
+        const llvm_dest_ty = try self.dg.llvmType(dest_ty);
+
+        const tg = self.dg.module.getTarget();

        const llvm_fn = self.getIntrinsic(intrinsic_name, &.{llvm_lhs_ty});
        const result_struct = self.builder.buildCall(llvm_fn, &[_]*const llvm.Value{ lhs, rhs }, 2, .Fast, .Auto, "");
-        return result_struct;
+
+        const result = self.builder.buildExtractValue(result_struct, 0, "");
+        const overflow_bit = self.builder.buildExtractValue(result_struct, 1, "");
+
+        var ty_buf: Type.Payload.Pointer = undefined;
+        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, llvmFieldIndex(dest_ty, 0, tg, &ty_buf).?, "");
+        return self.builder.buildInsertValue(partial, overflow_bit, llvmFieldIndex(dest_ty, 1, tg, &ty_buf).?, "");
    }

    fn buildElementwiseCall(
@ -5898,26 +5909,30 @@ pub const FuncGen = struct {

        const lhs_ty = self.air.typeOf(extra.lhs);
        const rhs_ty = self.air.typeOf(extra.rhs);
+        const lhs_scalar_ty = lhs_ty.scalarType();
+        const rhs_scalar_ty = rhs_ty.scalarType();
+
        const dest_ty = self.air.typeOfIndex(inst);
        const llvm_dest_ty = try self.dg.llvmType(dest_ty);

        const tg = self.dg.module.getTarget();

-        const casted_rhs = if (rhs_ty.bitSize(tg) < lhs_ty.bitSize(tg))
+        const casted_rhs = if (rhs_scalar_ty.bitSize(tg) < lhs_scalar_ty.bitSize(tg))
            self.builder.buildZExt(rhs, try self.dg.llvmType(lhs_ty), "")
        else
            rhs;

        const result = self.builder.buildShl(lhs, casted_rhs, "");
-        const reconstructed = if (lhs_ty.isSignedInt())
+        const reconstructed = if (lhs_scalar_ty.isSignedInt())
            self.builder.buildAShr(result, casted_rhs, "")
        else
            self.builder.buildLShr(result, casted_rhs, "");

        const overflow_bit = self.builder.buildICmp(.NE, lhs, reconstructed, "");

-        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, 0, "");
-        return self.builder.buildInsertValue(partial, overflow_bit, 1, "");
+        var ty_buf: Type.Payload.Pointer = undefined;
+        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, llvmFieldIndex(dest_ty, 0, tg, &ty_buf).?, "");
+        return self.builder.buildInsertValue(partial, overflow_bit, llvmFieldIndex(dest_ty, 1, tg, &ty_buf).?, "");
    }

    fn airAnd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@ -165,8 +165,24 @@

 #define int128_t __int128
 #define uint128_t unsigned __int128
+#define UINT128_MAX ((uint128_t)(0xffffffffffffffffull) | 0xffffffffffffffffull)
 ZIG_EXTERN_C void *memcpy (void *ZIG_RESTRICT, const void *ZIG_RESTRICT, size_t);
 ZIG_EXTERN_C void *memset (void *, int, size_t);
+ZIG_EXTERN_C int64_t    __addodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __addoti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __uaddodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __uaddoti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int32_t    __subosi4(int32_t   lhs, int32_t   rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __subodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __suboti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint32_t  __usubosi4(uint32_t  lhs, uint32_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __usubodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __usuboti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __mulodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __muloti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __umulodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __umuloti4(uint128_t lhs, uint128_t rhs, int *overflow);
+

 static inline uint8_t zig_addw_u8(uint8_t lhs, uint8_t rhs, uint8_t max) {
    uint8_t thresh = max - rhs;
@ -396,6 +412,689 @@ static inline long long zig_subw_longlong(long long lhs, long long rhs, long lon
    return (long long)(((unsigned long long)lhs) - ((unsigned long long)rhs));
 }

+static inline bool zig_addo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs + (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs + (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs + (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __addodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __addoti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs + (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_addo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs + (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_addo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs + (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_addo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+#else
+    int int_overflow;
+    *res = __uaddodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_addo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __uaddoti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs - (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs - (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs - (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __subodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __suboti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint16_t zig_subo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint32_t zig_subo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+    if (max == UINT32_MAX) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#endif
+        int int_overflow;
+        *res = __usubosi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint64_t zig_subo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    if (max == UINT64_MAX) {
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#else
+        int int_overflow;
+        *res = __usubodi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+#endif
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint128_t zig_subo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    if (max == UINT128_MAX) {
+        int int_overflow;
+        *res = __usuboti4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline bool zig_mulo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs * (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs * (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs * (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __mulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __muloti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs * (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_mulo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs * (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_mulo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs * (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_mulo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+#else
+    int int_overflow;
+    *res = __umulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_mulo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __umuloti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
 static inline float zig_bitcast_f32_u32(uint32_t arg) {
    float dest;
    memcpy(&dest, &arg, sizeof dest);
@ -608,6 +1307,76 @@ static inline int zig_popcount_u128(uint128_t value, uint8_t zig_type_bit_width)

 #define zig_popcount_i128 zig_popcount_u128

+static inline bool zig_shlo_i8(int8_t lhs, int8_t rhs, int8_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i8(lhs, bits) >= rhs) return false;
+    *res &= UINT8_MAX >> (8 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i16(int16_t lhs, int16_t rhs, int16_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i16(lhs, bits) >= rhs) return false;
+    *res &= UINT16_MAX >> (16 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i32(int32_t lhs, int32_t rhs, int32_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i32(lhs, bits) >= rhs) return false;
+    *res &= UINT32_MAX >> (32 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i64(int64_t lhs, int64_t rhs, int64_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i64(lhs, bits) >= rhs) return false;
+    *res &= UINT64_MAX >> (64 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i128(int128_t lhs, int128_t rhs, int128_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i128(lhs, bits) >= rhs) return false;
+    *res &= UINT128_MAX >> (128 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u8(lhs, bits) >= rhs) return false;
+    *res &= UINT8_MAX >> (8 - bits);
+    return true;
+}
+
+static inline uint16_t zig_shlo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u16(lhs, bits) >= rhs) return false;
+    *res &= UINT16_MAX >> (16 - bits);
+    return true;
+}
+
+static inline uint32_t zig_shlo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u32(lhs, bits) >= rhs) return false;
+    *res &= UINT32_MAX >> (32 - bits);
+    return true;
+}
+
+static inline uint64_t zig_shlo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u64(lhs, bits) >= rhs) return false;
+    *res &= UINT64_MAX >> (64 - bits);
+    return true;
+}
+
+static inline uint128_t zig_shlo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u128(lhs, bits) >= rhs) return false;
+    *res &= UINT128_MAX >> (128 - bits);
+    return true;
+}
+
 #define zig_sign_extend(T) \
    static inline T zig_sign_extend_##T(T value, uint8_t zig_type_bit_width) { \
        const T m = (T)1 << (T)(zig_type_bit_width - 1); \
--- a/src/type.zig
+++ b/src/type.zig
@ -5999,6 +5999,7 @@ pub const Type = extern union {
        };
    };

+    pub const @"u1" = initTag(.u1);
    pub const @"u8" = initTag(.u8);
    pub const @"u16" = initTag(.u16);
    pub const @"u32" = initTag(.u32);
--- a/src/value.zig
+++ b/src/value.zig
@ -1671,6 +1671,7 @@ pub const Value = extern union {
    }

    /// Asserts the value is an integer, and the destination type is ComptimeInt or Int.
+    /// Vectors are also accepted. Vector results are reduced with AND.
    pub fn intFitsInType(self: Value, ty: Type, target: Target) bool {
        switch (self.tag()) {
            .zero,
@ -1767,6 +1768,16 @@ pub const Value = extern union {
                else => unreachable,
            },

+            .aggregate => {
+                assert(ty.zigTypeTag() == .Vector);
+                for (self.castTag(.aggregate).?.data) |elem| {
+                    if (!elem.intFitsInType(ty.scalarType(), target)) {
+                        return false;
+                    }
+                }
+                return true;
+            },
+
            else => unreachable,
        }
    }
@ -2015,7 +2026,7 @@ pub const Value = extern union {
        const result_data = try allocator.alloc(Value, ty.vectorLen());
        for (result_data) |*scalar, i| {
            const res_bool = compareScalar(lhs.indexVectorlike(i), op, rhs.indexVectorlike(i), ty.scalarType(), mod);
-            scalar.* = if (res_bool) Value.@"true" else Value.@"false";
+            scalar.* = makeBool(res_bool);
        }
        return Value.Tag.aggregate.create(allocator, result_data);
    }
@ -2950,7 +2961,8 @@ pub const Value = extern union {
    }

    pub const OverflowArithmeticResult = struct {
-        overflowed: bool,
+        /// TODO: Rename to `overflow_bit` and make of type `u1`.
+        overflowed: Value,
        wrapped_result: Value,
    };

@ -2960,6 +2972,29 @@ pub const Value = extern union {
        ty: Type,
        arena: Allocator,
        target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intAddWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intAddWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intAddWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
    ) !OverflowArithmeticResult {
        const info = ty.intInfo(target);

@ -2975,7 +3010,7 @@ pub const Value = extern union {
        const overflowed = result_bigint.addWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
        const result = try fromBigInt(arena, result_bigint.toConst());
        return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
            .wrapped_result = result,
        };
    }
@ -3086,6 +3121,29 @@ pub const Value = extern union {
        ty: Type,
        arena: Allocator,
        target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intSubWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intSubWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intSubWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
    ) !OverflowArithmeticResult {
        const info = ty.intInfo(target);

@ -3101,7 +3159,7 @@ pub const Value = extern union {
        const overflowed = result_bigint.subWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
        const wrapped_result = try fromBigInt(arena, result_bigint.toConst());
        return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
            .wrapped_result = wrapped_result,
        };
    }
@ -3196,6 +3254,29 @@ pub const Value = extern union {
        ty: Type,
        arena: Allocator,
        target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intMulWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intMulWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intMulWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
    ) !OverflowArithmeticResult {
        const info = ty.intInfo(target);

@ -3220,7 +3301,7 @@ pub const Value = extern union {
        }

        return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
            .wrapped_result = try fromBigInt(arena, result_bigint.toConst()),
        };
    }
@ -3910,6 +3991,29 @@ pub const Value = extern union {
        ty: Type,
        allocator: Allocator,
        target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try allocator.alloc(Value, ty.vectorLen());
+            const result_data = try allocator.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try shlWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), allocator, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(allocator, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(allocator, result_data),
+            };
+        }
+        return shlWithOverflowScalar(lhs, rhs, ty, allocator, target);
+    }
+
+    pub fn shlWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        allocator: Allocator,
+        target: Target,
    ) !OverflowArithmeticResult {
        const info = ty.intInfo(target);
        var lhs_space: Value.BigIntSpace = undefined;
@ -3930,7 +4034,7 @@ pub const Value = extern union {
            result_bigint.truncate(result_bigint.toConst(), info.signedness, info.bits);
        }
        return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
            .wrapped_result = try fromBigInt(allocator, result_bigint.toConst()),
        };
    }
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@ -621,24 +621,41 @@ test "128-bit multiplication" {
 test "@addWithOverflow" {
    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO

-    var result: u8 = undefined;
-    try expect(@addWithOverflow(u8, 250, 100, &result));
-    try expect(result == 94);
-    try expect(!@addWithOverflow(u8, 100, 150, &result));
-    try expect(result == 250);
+    {
+        var result: u8 = undefined;
+        try expect(@addWithOverflow(u8, 250, 100, &result));
+        try expect(result == 94);
+        try expect(!@addWithOverflow(u8, 100, 150, &result));
+        try expect(result == 250);

-    var a: u8 = 200;
-    var b: u8 = 99;
-    try expect(@addWithOverflow(u8, a, b, &result));
-    try expect(result == 43);
-    b = 55;
-    try expect(!@addWithOverflow(u8, a, b, &result));
-    try expect(result == 255);
+        var a: u8 = 200;
+        var b: u8 = 99;
+        try expect(@addWithOverflow(u8, a, b, &result));
+        try expect(result == 43);
+        b = 55;
+        try expect(!@addWithOverflow(u8, a, b, &result));
+        try expect(result == 255);
+    }
+
+    {
+        var a: usize = 6;
+        var b: usize = 6;
+        var res: usize = undefined;
+        try expect(!@addWithOverflow(usize, a, b, &res));
+        try expect(res == 12);
+    }
+
+    {
+        var a: isize = -6;
+        var b: isize = -6;
+        var res: isize = undefined;
+        try expect(!@addWithOverflow(isize, a, b, &res));
+        try expect(res == -12);
+    }
 }

 test "small int addition" {
    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO

    var x: u2 = 0;
@ -886,19 +903,37 @@ test "@mulWithOverflow bitsize > 32" {
 test "@subWithOverflow" {
    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO

-    var result: u8 = undefined;
-    try expect(@subWithOverflow(u8, 1, 2, &result));
-    try expect(result == 255);
-    try expect(!@subWithOverflow(u8, 1, 1, &result));
-    try expect(result == 0);
+    {
+        var result: u8 = undefined;
+        try expect(@subWithOverflow(u8, 1, 2, &result));
+        try expect(result == 255);
+        try expect(!@subWithOverflow(u8, 1, 1, &result));
+        try expect(result == 0);

-    var a: u8 = 1;
-    var b: u8 = 2;
-    try expect(@subWithOverflow(u8, a, b, &result));
-    try expect(result == 255);
-    b = 1;
-    try expect(!@subWithOverflow(u8, a, b, &result));
-    try expect(result == 0);
+        var a: u8 = 1;
+        var b: u8 = 2;
+        try expect(@subWithOverflow(u8, a, b, &result));
+        try expect(result == 255);
+        b = 1;
+        try expect(!@subWithOverflow(u8, a, b, &result));
+        try expect(result == 0);
+    }
+
+    {
+        var a: usize = 6;
+        var b: usize = 6;
+        var res: usize = undefined;
+        try expect(!@subWithOverflow(usize, a, b, &res));
+        try expect(res == 0);
+    }
+
+    {
+        var a: isize = -6;
+        var b: isize = -6;
+        var res: isize = undefined;
+        try expect(!@subWithOverflow(isize, a, b, &res));
+        try expect(res == 0);
+    }
 }

 test "@shlWithOverflow" {
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@ -903,3 +903,123 @@ test "multiplication-assignment operator with an array operand" {
    try S.doTheTest();
    comptime try S.doTheTest();
 }
+
+test "@addWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            {
+                var result: @Vector(4, u8) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u8), @Vector(4, u8){ 250, 250, 250, 250 }, @Vector(4, u8){ 0, 5, 6, 10 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, true, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, i8) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, i8), @Vector(4, i8){ -125, -125, 125, 125 }, @Vector(4, i8){ -3, -4, 2, 3 }, &result);
+                var expected: @Vector(4, bool) = .{ false, true, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, u1) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u1), @Vector(4, u1){ 0, 0, 1, 1 }, @Vector(4, u1){ 0, 1, 0, 1 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, u0) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u0), @Vector(4, u0){ 0, 0, 0, 0 }, @Vector(4, u0){ 0, 0, 0, 0 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, false, false };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@subWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            {
+                var result: @Vector(2, u8) = undefined;
+                var overflow = @subWithOverflow(@Vector(2, u8), @Vector(2, u8){ 5, 5 }, @Vector(2, u8){ 5, 6 }, &result);
+                var expected: @Vector(2, bool) = .{ false, true };
+                try expect(mem.eql(bool, &@as([2]bool, overflow), &@as([2]bool, expected)));
+            }
+            {
+                var result: @Vector(4, i8) = undefined;
+                var overflow = @subWithOverflow(@Vector(4, i8), @Vector(4, i8){ -120, -120, 120, 120 }, @Vector(4, i8){ 8, 9, -7, -8 }, &result);
+                var expected: @Vector(4, bool) = .{ false, true, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@mulWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @mulWithOverflow(@Vector(4, u8), @Vector(4, u8){ 10, 10, 10, 10 }, @Vector(4, u8){ 25, 26, 0, 30 }, &result);
+            var expected: @Vector(4, bool) = .{ false, true, false, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@shlWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @shlWithOverflow(@Vector(4, u8), @Vector(4, u8){ 0, 1, 8, 255 }, @Vector(4, u3){ 7, 7, 7, 7 }, &result);
+            var expected: @Vector(4, bool) = .{ false, false, true, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
--- a/test/cases/recursive_fibonacci.zig
+++ b/test/cases/recursive_fibonacci.zig
@ -20,5 +20,5 @@ fn assert(ok: bool) void {
 }

 // run
-// target=arm-linux,x86_64-linux,x86_64-macos,wasm32-wasi
+// target=x86_64-linux,x86_64-macos,wasm32-wasi
 //