Sema: remove all_vector_instructions logic

Backends can instead ask legalization on a per-instruction basis.
2026-02-21 16:54:52 +00:00 · 2025-05-30 12:13:18 -04:00 · 2025-05-30 12:13:18 -04:00 · 6198f7afb7
commit 6198f7afb7
parent b4a0a082dc
15 changed files with 222 additions and 195 deletions
--- a/src/Air/Legalize.zig
+++ b/src/Air/Legalize.zig
@ -42,6 +42,7 @@ pub const Feature = enum {
    scalarize_shl_sat,
    scalarize_xor,
    scalarize_not,
+    scalarize_bitcast,
    scalarize_clz,
    scalarize_ctz,
    scalarize_popcount,
@ -76,7 +77,7 @@ pub const Feature = enum {
    scalarize_mul_add,

    /// Legalize (shift lhs, (splat rhs)) -> (shift lhs, rhs)
-    remove_shift_vector_rhs_splat,
+    unsplat_shift_rhs,
    /// Legalize reduce of a one element vector to a bitcast
    reduce_one_elem_to_bitcast,

@ -121,6 +122,7 @@ pub const Feature = enum {
            .shl_sat => .scalarize_shl_sat,
            .xor => .scalarize_xor,
            .not => .scalarize_not,
+            .bitcast => .scalarize_bitcast,
            .clz => .scalarize_clz,
            .ctz => .scalarize_ctz,
            .popcount => .scalarize_popcount,
@ -259,9 +261,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
            => |air_tag| done: {
                const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
                if (!l.typeOf(bin_op.rhs).isVector(zcu)) break :done;
-                if (l.features.contains(comptime .scalarize(air_tag))) {
-                    continue :inst try l.scalarize(inst, .bin_op);
-                } else if (l.features.contains(.remove_shift_vector_rhs_splat)) {
+                if (l.features.contains(.unsplat_shift_rhs)) {
                    if (bin_op.rhs.toInterned()) |rhs_ip_index| switch (ip.indexToKey(rhs_ip_index)) {
                        else => {},
                        .aggregate => |aggregate| switch (aggregate.storage) {
@ -282,6 +282,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
                        }
                    }
                }
+                if (l.features.contains(comptime .scalarize(air_tag))) continue :inst try l.scalarize(inst, .bin_op);
            },
            inline .not,
            .clz,
@ -302,8 +303,14 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
                const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
                if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
            },
-            .bitcast,
-            => {},
+            inline .bitcast,
+            => |air_tag| if (l.features.contains(comptime .scalarize(air_tag))) {
+                const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
+                const to_ty = ty_op.ty.toType();
+                const from_ty = l.typeOf(ty_op.operand);
+                if (to_ty.isVector(zcu) and from_ty.isVector(zcu) and to_ty.vectorLen(zcu) == from_ty.vectorLen(zcu))
+                    continue :inst try l.scalarize(inst, .ty_op);
+            },
            .block,
            .loop,
            => {
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -10165,16 +10165,7 @@ fn zirIntFromPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
    try sema.requireRuntimeBlock(block, block.nodeOffset(inst_data.src_node), ptr_src);
    try sema.validateRuntimeValue(block, ptr_src, operand);
    try sema.checkLogicalPtrOperation(block, ptr_src, ptr_ty);
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addBitCast(dest_ty, operand);
-    }
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addBitCast(.usize, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addBitCast(dest_ty, operand);
 }

 fn zirFieldVal(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@ -10640,17 +10631,7 @@ fn zirFloatCast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
    if (dst_bits >= src_bits) {
        return sema.coerce(block, dest_ty, operand, operand_src);
    }
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addTyOp(.fptrunc, dest_ty, operand);
-    }
-    const vec_len = operand_ty.vectorLen(zcu);
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, vec_len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addTyOp(.fptrunc, dest_scalar_ty, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addTyOp(.fptrunc, dest_ty, operand);
 }

 fn zirElemVal(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@ -20722,16 +20703,7 @@ fn zirIntFromBool(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
            .storage = .{ .elems = new_elems },
        } }));
    }
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addBitCast(dest_ty, operand);
-    }
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addBitCast(.u1, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addBitCast(dest_ty, operand);
 }

 fn zirErrorName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@ -22327,42 +22299,23 @@ fn zirIntFromFloat(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
            .storage = .{ .repeated_elem = (try pt.intValue(dest_scalar_ty, 0)).toIntern() },
        } }));
    }
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        const result = try block.addTyOp(if (block.float_mode == .optimized) .int_from_float_optimized else .int_from_float, dest_ty, operand);
-        if (block.wantSafety()) {
-            const back = try block.addTyOp(.float_from_int, operand_ty, result);
-            const diff = try block.addBinOp(if (block.float_mode == .optimized) .sub_optimized else .sub, operand, back);
-            const ok = if (is_vector) ok: {
-                const ok_pos = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, 1.0))).toIntern()), .lt);
-                const ok_neg = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, -1.0))).toIntern()), .gt);
-                const ok = try block.addBinOp(.bit_and, ok_pos, ok_neg);
-                break :ok try block.addReduce(ok, .And);
-            } else ok: {
-                const ok_pos = try block.addBinOp(if (block.float_mode == .optimized) .cmp_lt_optimized else .cmp_lt, diff, Air.internedToRef((try pt.floatValue(operand_ty, 1.0)).toIntern()));
-                const ok_neg = try block.addBinOp(if (block.float_mode == .optimized) .cmp_gt_optimized else .cmp_gt, diff, Air.internedToRef((try pt.floatValue(operand_ty, -1.0)).toIntern()));
-                break :ok try block.addBinOp(.bool_and, ok_pos, ok_neg);
-            };
-            try sema.addSafetyCheck(block, src, ok, .integer_part_out_of_bounds);
-        }
-        return result;
+    const result = try block.addTyOp(if (block.float_mode == .optimized) .int_from_float_optimized else .int_from_float, dest_ty, operand);
+    if (block.wantSafety()) {
+        const back = try block.addTyOp(.float_from_int, operand_ty, result);
+        const diff = try block.addBinOp(if (block.float_mode == .optimized) .sub_optimized else .sub, operand, back);
+        const ok = if (is_vector) ok: {
+            const ok_pos = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, 1.0))).toIntern()), .lt);
+            const ok_neg = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, -1.0))).toIntern()), .gt);
+            const ok = try block.addBinOp(.bit_and, ok_pos, ok_neg);
+            break :ok try block.addReduce(ok, .And);
+        } else ok: {
+            const ok_pos = try block.addBinOp(if (block.float_mode == .optimized) .cmp_lt_optimized else .cmp_lt, diff, Air.internedToRef((try pt.floatValue(operand_ty, 1.0)).toIntern()));
+            const ok_neg = try block.addBinOp(if (block.float_mode == .optimized) .cmp_gt_optimized else .cmp_gt, diff, Air.internedToRef((try pt.floatValue(operand_ty, -1.0)).toIntern()));
+            break :ok try block.addBinOp(.bool_and, ok_pos, ok_neg);
+        };
+        try sema.addSafetyCheck(block, src, ok, .integer_part_out_of_bounds);
    }
-    const len = dest_ty.vectorLen(zcu);
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        const result = try block.addTyOp(if (block.float_mode == .optimized) .int_from_float_optimized else .int_from_float, dest_scalar_ty, old_elem);
-        if (block.wantSafety()) {
-            const back = try block.addTyOp(.float_from_int, operand_scalar_ty, result);
-            const diff = try block.addBinOp(.sub, old_elem, back);
-            const ok_pos = try block.addBinOp(if (block.float_mode == .optimized) .cmp_lt_optimized else .cmp_lt, diff, Air.internedToRef((try pt.floatValue(operand_scalar_ty, 1.0)).toIntern()));
-            const ok_neg = try block.addBinOp(if (block.float_mode == .optimized) .cmp_gt_optimized else .cmp_gt, diff, Air.internedToRef((try pt.floatValue(operand_scalar_ty, -1.0)).toIntern()));
-            const ok = try block.addBinOp(.bool_and, ok_pos, ok_neg);
-            try sema.addSafetyCheck(block, src, ok, .integer_part_out_of_bounds);
-        }
-        new_elem.* = result;
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return result;
 }

 fn zirFloatFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@ -22377,7 +22330,6 @@ fn zirFloatFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
    const operand_ty = sema.typeOf(operand);

    try sema.checkVectorizableBinaryOperands(block, operand_src, dest_ty, operand_ty, src, operand_src);
-    const is_vector = dest_ty.zigTypeTag(zcu) == .vector;

    const dest_scalar_ty = dest_ty.scalarType(zcu);
    const operand_scalar_ty = operand_ty.scalarType(zcu);
@ -22393,17 +22345,7 @@ fn zirFloatFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
    }

    try sema.requireRuntimeBlock(block, src, operand_src);
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addTyOp(.float_from_int, dest_ty, operand);
-    }
-    const len = operand_ty.vectorLen(zcu);
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addTyOp(.float_from_int, dest_scalar_ty, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addTyOp(.float_from_int, dest_ty, operand);
 }

 fn zirPtrFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@ -22473,69 +22415,34 @@ fn zirPtrFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
    }
    try sema.requireRuntimeBlock(block, src, operand_src);
    try sema.checkLogicalPtrOperation(block, src, ptr_ty);
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        if (block.wantSafety() and (try elem_ty.hasRuntimeBitsSema(pt) or elem_ty.zigTypeTag(zcu) == .@"fn")) {
-            if (!ptr_ty.isAllowzeroPtr(zcu)) {
-                const is_non_zero = if (is_vector) all_non_zero: {
-                    const zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
-                    const is_non_zero = try block.addCmpVector(operand_coerced, zero_usize, .neq);
-                    break :all_non_zero try block.addReduce(is_non_zero, .And);
-                } else try block.addBinOp(.cmp_neq, operand_coerced, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_non_zero, .cast_to_null);
-            }
-            if (ptr_align.compare(.gt, .@"1")) {
-                const align_bytes_minus_1 = ptr_align.toByteUnits().? - 1;
-                const align_mask = Air.internedToRef((try sema.splat(operand_ty, try pt.intValue(
-                    .usize,
-                    if (elem_ty.fnPtrMaskOrNull(zcu)) |mask|
-                        align_bytes_minus_1 & mask
-                    else
-                        align_bytes_minus_1,
-                ))).toIntern());
-                const remainder = try block.addBinOp(.bit_and, operand_coerced, align_mask);
-                const is_aligned = if (is_vector) all_aligned: {
-                    const splat_zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
-                    const is_aligned = try block.addCmpVector(remainder, splat_zero_usize, .eq);
-                    break :all_aligned try block.addReduce(is_aligned, .And);
-                } else try block.addBinOp(.cmp_eq, remainder, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_aligned, .incorrect_alignment);
-            }
-        }
-        return block.addBitCast(dest_ty, operand_coerced);
-    }
-
-    const len = dest_ty.vectorLen(zcu);
    if (block.wantSafety() and (try elem_ty.hasRuntimeBitsSema(pt) or elem_ty.zigTypeTag(zcu) == .@"fn")) {
-        for (0..len) |i| {
-            const idx_ref = try pt.intRef(.usize, i);
-            const elem_coerced = try block.addBinOp(.array_elem_val, operand_coerced, idx_ref);
-            if (!ptr_ty.isAllowzeroPtr(zcu)) {
-                const is_non_zero = try block.addBinOp(.cmp_neq, elem_coerced, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_non_zero, .cast_to_null);
-            }
-            if (ptr_align.compare(.gt, .@"1")) {
-                const align_bytes_minus_1 = ptr_align.toByteUnits().? - 1;
-                const align_mask = Air.internedToRef((try pt.intValue(
-                    .usize,
-                    if (elem_ty.fnPtrMaskOrNull(zcu)) |mask|
-                        align_bytes_minus_1 & mask
-                    else
-                        align_bytes_minus_1,
-                )).toIntern());
-                const remainder = try block.addBinOp(.bit_and, elem_coerced, align_mask);
-                const is_aligned = try block.addBinOp(.cmp_eq, remainder, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_aligned, .incorrect_alignment);
-            }
+        if (!ptr_ty.isAllowzeroPtr(zcu)) {
+            const is_non_zero = if (is_vector) all_non_zero: {
+                const zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
+                const is_non_zero = try block.addCmpVector(operand_coerced, zero_usize, .neq);
+                break :all_non_zero try block.addReduce(is_non_zero, .And);
+            } else try block.addBinOp(.cmp_neq, operand_coerced, .zero_usize);
+            try sema.addSafetyCheck(block, src, is_non_zero, .cast_to_null);
+        }
+        if (ptr_align.compare(.gt, .@"1")) {
+            const align_bytes_minus_1 = ptr_align.toByteUnits().? - 1;
+            const align_mask = Air.internedToRef((try sema.splat(operand_ty, try pt.intValue(
+                .usize,
+                if (elem_ty.fnPtrMaskOrNull(zcu)) |mask|
+                    align_bytes_minus_1 & mask
+                else
+                    align_bytes_minus_1,
+            ))).toIntern());
+            const remainder = try block.addBinOp(.bit_and, operand_coerced, align_mask);
+            const is_aligned = if (is_vector) all_aligned: {
+                const splat_zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
+                const is_aligned = try block.addCmpVector(remainder, splat_zero_usize, .eq);
+                break :all_aligned try block.addReduce(is_aligned, .And);
+            } else try block.addBinOp(.cmp_eq, remainder, .zero_usize);
+            try sema.addSafetyCheck(block, src, is_aligned, .incorrect_alignment);
        }
    }
-
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand_coerced, idx_ref);
-        new_elem.* = try block.addBitCast(ptr_ty, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addBitCast(dest_ty, operand_coerced);
 }

 fn ptrFromIntVal(
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@ -3840,15 +3840,6 @@ pub const Feature = enum {
    safety_checked_instructions,
    /// If the backend supports running from another thread.
    separate_thread,
-    /// If the backend supports the following AIR instructions with vector types:
-    /// * `Air.Inst.Tag.bit_and`
-    /// * `Air.Inst.Tag.bit_or`
-    /// * `Air.Inst.Tag.bitcast`
-    /// * `Air.Inst.Tag.float_from_int`
-    /// * `Air.Inst.Tag.fptrunc`
-    /// * `Air.Inst.Tag.int_from_float`
-    /// If not supported, Sema will scalarize the operation.
-    all_vector_instructions,
 };

 pub fn backendSupportsFeature(zcu: *const Zcu, comptime feature: Feature) bool {
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@ -40,6 +40,10 @@ const gp = abi.RegisterClass.gp;

 const InnerError = CodeGenError || error{OutOfRegisters};

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 gpa: Allocator,
 pt: Zcu.PerThread,
 air: Air,
@ -2261,12 +2265,13 @@ fn shiftExact(
    rhs_ty: Type,
    maybe_inst: ?Air.Inst.Index,
 ) InnerError!MCValue {
-    _ = rhs_ty;
-
    const pt = self.pt;
    const zcu = pt.zcu;
    switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO binary operations on vectors", .{}),
        .int => {
            const int_info = lhs_ty.intInfo(zcu);
            if (int_info.bits <= 64) {
@ -2317,7 +2322,10 @@ fn shiftNormal(
    const pt = self.pt;
    const zcu = pt.zcu;
    switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO binary operations on vectors", .{}),
        .int => {
            const int_info = lhs_ty.intInfo(zcu);
            if (int_info.bits <= 64) {
@ -2874,7 +2882,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!void {
        const overflow_bit_offset = @as(u32, @intCast(tuple_ty.structFieldOffset(1, zcu)));

        switch (lhs_ty.zigTypeTag(zcu)) {
-            .vector => return self.fail("TODO implement shl_with_overflow for vectors", .{}),
+            .vector => if (!rhs_ty.isVector(zcu))
+                return self.fail("TODO implement vector shl_with_overflow with scalar rhs", .{})
+            else
+                return self.fail("TODO implement shl_with_overflow for vectors", .{}),
            .int => {
                const int_info = lhs_ty.intInfo(zcu);
                if (int_info.bits <= 64) {
@ -2993,8 +3004,14 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!void {
 }

 fn airShlSat(self: *Self, inst: Air.Inst.Index) InnerError!void {
+    const zcu = self.pt.zcu;
    const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+        return self.fail("TODO implement vector shl_sat with scalar rhs for {}", .{self.target.cpu.arch})
+    else
+        return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }

--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@ -41,6 +41,10 @@ const gp = abi.RegisterClass.gp;

 const InnerError = CodeGenError || error{OutOfRegisters};

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 gpa: Allocator,
 pt: Zcu.PerThread,
 air: Air,
@ -1857,7 +1861,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
        const overflow_bit_offset: u32 = @intCast(tuple_ty.structFieldOffset(1, zcu));

        switch (lhs_ty.zigTypeTag(zcu)) {
-            .vector => return self.fail("TODO implement shl_with_overflow for vectors", .{}),
+            .vector => if (!rhs_ty.isVector(zcu))
+                return self.fail("TODO implement vector shl_with_overflow with scalar rhs", .{})
+            else
+                return self.fail("TODO implement shl_with_overflow for vectors", .{}),
            .int => {
                const int_info = lhs_ty.intInfo(zcu);
                if (int_info.bits <= 32) {
@ -1978,8 +1985,14 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
 }

 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
+    const zcu = self.pt.zcu;
    const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+        return self.fail("TODO implement vector shl_sat with scalar rhs for {}", .{self.target.cpu.arch})
+    else
+        return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }

@ -3788,7 +3801,10 @@ fn shiftExact(
    const pt = self.pt;
    const zcu = pt.zcu;
    switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO ARM binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO ARM vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO ARM binary operations on vectors", .{}),
        .int => {
            const int_info = lhs_ty.intInfo(zcu);
            if (int_info.bits <= 32) {
@ -3828,7 +3844,10 @@ fn shiftNormal(
    const pt = self.pt;
    const zcu = pt.zcu;
    switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO ARM binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO ARM vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO ARM binary operations on vectors", .{}),
        .int => {
            const int_info = lhs_ty.intInfo(zcu);
            if (int_info.bits <= 32) {
--- a/src/arch/powerpc/CodeGen.zig
+++ b/src/arch/powerpc/CodeGen.zig
@ -10,6 +10,10 @@ const Zcu = @import("../../Zcu.zig");
 const assert = std.debug.assert;
 const log = std.log.scoped(.codegen);

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pub fn generate(
    bin_file: *link.File,
    pt: Zcu.PerThread,
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@ -51,6 +51,10 @@ const Instruction = encoding.Instruction;

 const InnerError = CodeGenError || error{OutOfRegisters};

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pt: Zcu.PerThread,
 air: Air,
 liveness: Air.Liveness,
@ -2764,6 +2768,7 @@ fn genBinOp(
        .shl,
        .shl_exact,
        => {
+            if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) return func.fail("TODO: vector shift with scalar rhs", .{});
            if (bit_size > 64) return func.fail("TODO: genBinOp shift > 64 bits, {}", .{bit_size});
            try func.truncateRegister(rhs_ty, rhs_reg);

@ -3248,8 +3253,14 @@ fn airMulWithOverflow(func: *Func, inst: Air.Inst.Index) !void {
 }

 fn airShlWithOverflow(func: *Func, inst: Air.Inst.Index) !void {
+    const zcu = func.pt.zcu;
    const bin_op = func.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else return func.fail("TODO implement airShlWithOverflow", .{});
+    const result: MCValue = if (func.liveness.isUnused(inst))
+        .unreach
+    else if (func.typeOf(bin_op.lhs).isVector(zcu) and !func.typeOf(bin_op.rhs).isVector(zcu))
+        return func.fail("TODO implement vector airShlWithOverflow with scalar rhs", .{})
+    else
+        return func.fail("TODO implement airShlWithOverflow", .{});
    return func.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }

@ -3266,8 +3277,14 @@ fn airMulSat(func: *Func, inst: Air.Inst.Index) !void {
 }

 fn airShlSat(func: *Func, inst: Air.Inst.Index) !void {
+    const zcu = func.pt.zcu;
    const bin_op = func.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else return func.fail("TODO implement airShlSat", .{});
+    const result: MCValue = if (func.liveness.isUnused(inst))
+        .unreach
+    else if (func.typeOf(bin_op.lhs).isVector(zcu) and !func.typeOf(bin_op.rhs).isVector(zcu))
+        return func.fail("TODO implement vector airShlSat with scalar rhs", .{})
+    else
+        return func.fail("TODO implement airShlSat", .{});
    return func.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }

--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@ -41,6 +41,10 @@ const Self = @This();

 const InnerError = CodeGenError || error{OutOfRegisters};

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 const RegisterView = enum(u1) {
    caller,
    callee,
@ -2270,8 +2274,14 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
 }

 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
+    const zcu = self.pt.zcu;
    const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+        return self.fail("TODO implement vector shl_sat with scalar rhs for {}", .{self.target.cpu.arch})
+    else
+        return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }

@ -2287,7 +2297,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
        const rhs_ty = self.typeOf(extra.rhs);

        switch (lhs_ty.zigTypeTag(zcu)) {
-            .vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}),
+            .vector => if (!rhs_ty.isVector(zcu))
+                return self.fail("TODO implement vector shl_with_overflow with scalar rhs", .{})
+            else
+                return self.fail("TODO implement mul_with_overflow for vectors", .{}),
            .int => {
                const int_info = lhs_ty.intInfo(zcu);
                if (int_info.bits <= 64) {
@ -3002,7 +3015,10 @@ fn binOp(

            // Truncate if necessary
            switch (lhs_ty.zigTypeTag(zcu)) {
-                .vector => return self.fail("TODO binary operations on vectors", .{}),
+                .vector => if (rhs_ty.isVector(zcu))
+                    return self.fail("TODO vector shift with scalar rhs", .{})
+                else
+                    return self.fail("TODO binary operations on vectors", .{}),
                .int => {
                    const int_info = lhs_ty.intInfo(zcu);
                    if (int_info.bits <= 64) {
@ -3024,7 +3040,10 @@ fn binOp(
        .shr_exact,
        => {
            switch (lhs_ty.zigTypeTag(zcu)) {
-                .vector => return self.fail("TODO binary operations on vectors", .{}),
+                .vector => if (rhs_ty.isVector(zcu))
+                    return self.fail("TODO vector shift with scalar rhs", .{})
+                else
+                    return self.fail("TODO binary operations on vectors", .{}),
                .int => {
                    const int_info = lhs_ty.intInfo(zcu);
                    if (int_info.bits <= 64) {
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@ -31,6 +31,10 @@ const libcFloatSuffix = target_util.libcFloatSuffix;
 const compilerRtFloatAbbrev = target_util.compilerRtFloatAbbrev;
 const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 /// Reference to the function declaration the code
 /// section belongs to
 owner_nav: InternPool.Nav.Index,
@ -2638,6 +2642,10 @@ fn airBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
    // For big integers we can ignore this as we will call into compiler-rt which handles this.
    const result = switch (op) {
        .shr, .shl => result: {
+            if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) {
+                return cg.fail("TODO: implement vector '{s}' with scalar rhs", .{@tagName(op)});
+            }
+
            const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse {
                return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)});
            };
@ -3055,8 +3063,12 @@ fn airWrapBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
    const lhs_ty = cg.typeOf(bin_op.lhs);
    const rhs_ty = cg.typeOf(bin_op.rhs);

-    if (lhs_ty.zigTypeTag(zcu) == .vector or rhs_ty.zigTypeTag(zcu) == .vector) {
-        return cg.fail("TODO: Implement wrapping arithmetic for vectors", .{});
+    if (lhs_ty.isVector(zcu)) {
+        if ((op == .shr or op == .shl) and !rhs_ty.isVector(zcu)) {
+            return cg.fail("TODO: implement wrapping vector '{s}' with scalar rhs", .{@tagName(op)});
+        } else {
+            return cg.fail("TODO: implement wrapping '{s}' for vectors", .{@tagName(op)});
+        }
    }

    // For certain operations, such as shifting, the types are different.
@ -6067,13 +6079,17 @@ fn airShlWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
    const ty = cg.typeOf(extra.lhs);
    const rhs_ty = cg.typeOf(extra.rhs);

-    if (ty.zigTypeTag(zcu) == .vector) {
-        return cg.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    if (ty.isVector(zcu)) {
+        if (!rhs_ty.isVector(zcu)) {
+            return cg.fail("TODO: implement vector 'shl_with_overflow' with scalar rhs", .{});
+        } else {
+            return cg.fail("TODO: implement vector 'shl_with_overflow'", .{});
+        }
    }

    const int_info = ty.intInfo(zcu);
    const wasm_bits = toWasmBits(int_info.bits) orelse {
-        return cg.fail("TODO: Implement shl_with_overflow for integer bitsize: {d}", .{int_info.bits});
+        return cg.fail("TODO: implement 'shl_with_overflow' for integer bitsize: {d}", .{int_info.bits});
    };

    // Ensure rhs is coerced to lhs as they must have the same WebAssembly types
@ -6994,6 +7010,11 @@ fn airShlSat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {

    const pt = cg.pt;
    const zcu = pt.zcu;
+
+    if (cg.typeOf(bin_op.lhs).isVector(zcu) and !cg.typeOf(bin_op.rhs).isVector(zcu)) {
+        return cg.fail("TODO: implement vector 'shl_sat' with scalar rhs", .{});
+    }
+
    const ty = cg.typeOfIndex(inst);
    const int_info = ty.intInfo(zcu);
    const is_signed = int_info.signedness == .signed;
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@ -32,7 +32,7 @@ const FrameIndex = bits.FrameIndex;

 const InnerError = codegen.CodeGenError || error{OutOfRegisters};

-pub inline fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features {
+pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features {
    @setEvalBranchQuota(1_200);
    return switch (target.ofmt == .coff) {
        inline false, true => |use_old| comptime &.init(.{
@ -86,7 +86,7 @@ pub inline fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Fe
            .scalarize_float_from_int = use_old,
            .scalarize_mul_add = use_old,

-            .remove_shift_vector_rhs_splat = false,
+            .unsplat_shift_rhs = false,
            .reduce_one_elem_to_bitcast = true,
        }),
    };
--- a/src/codegen.zig
+++ b/src/codegen.zig
@ -52,7 +52,7 @@ fn importBackend(comptime backend: std.builtin.CompilerBackend) type {
 pub fn legalizeFeatures(pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) *const Air.Legalize.Features {
    const zcu = pt.zcu;
    const target = &zcu.navFileScope(nav_index).mod.?.resolved_target.result;
-    switch (target_util.zigBackend(target.*, zcu.comp.config.use_llvm)) {
+    return switch (target_util.zigBackend(target.*, zcu.comp.config.use_llvm)) {
        else => unreachable,
        inline .stage2_llvm,
        .stage2_c,
@ -65,11 +65,8 @@ pub fn legalizeFeatures(pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) *con
        .stage2_sparc64,
        .stage2_spirv64,
        .stage2_powerpc,
-        => |backend| {
-            const Backend = importBackend(backend);
-            return if (@hasDecl(Backend, "legalizeFeatures")) Backend.legalizeFeatures(target) else comptime &.initEmpty();
-        },
-    }
+        => |backend| importBackend(backend).legalizeFeatures(target),
+    };
 }

 pub fn generateFunction(
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@ -20,6 +20,10 @@ const Alignment = InternPool.Alignment;
 const BigIntLimb = std.math.big.Limb;
 const BigInt = std.math.big.int;

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pub const CType = @import("c/Type.zig");

 pub const CValue = union(enum) {
@ -4179,7 +4183,7 @@ fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info:
    try v.elem(f, w);
    try w.writeAll(", ");
    try f.writeCValue(w, rhs, .FunctionArgument);
-    try v.elem(f, w);
+    if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, w);
    try f.object.dg.renderBuiltinInfo(w, scalar_ty, info);
    try w.writeAll(");\n");
    try v.end(f, inst, w);
@ -6536,7 +6540,7 @@ fn airBinBuiltinCall(
    try v.elem(f, writer);
    try writer.writeAll(", ");
    try f.writeCValue(writer, rhs, .FunctionArgument);
-    try v.elem(f, writer);
+    if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, writer);
    try f.object.dg.renderBuiltinInfo(writer, scalar_ty, info);
    try writer.writeAll(");\n");
    try v.end(f, inst, writer);
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@ -36,6 +36,10 @@ const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;

 const Error = error{ OutOfMemory, CodegenFail };

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 fn subArchName(features: std.Target.Cpu.Feature.Set, arch: anytype, mappings: anytype) ?[]const u8 {
    inline for (mappings) |mapping| {
        if (arch.featureSetHas(features, mapping[0])) return mapping[1];
@ -8923,6 +8927,8 @@ pub const FuncGen = struct {
        const rhs = try self.resolveInst(extra.rhs);

        const lhs_ty = self.typeOf(extra.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
        const lhs_scalar_ty = lhs_ty.scalarType(zcu);

        const dest_ty = self.typeOfIndex(inst);
@ -8992,6 +8998,8 @@ pub const FuncGen = struct {
        const rhs = try self.resolveInst(bin_op.rhs);

        const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
        const lhs_scalar_ty = lhs_ty.scalarType(zcu);

        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
@ -9003,14 +9011,17 @@ pub const FuncGen = struct {

    fn airShl(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
        const o = self.ng.object;
+        const zcu = o.pt.zcu;
        const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;

        const lhs = try self.resolveInst(bin_op.lhs);
        const rhs = try self.resolveInst(bin_op.rhs);

-        const lhs_type = self.typeOf(bin_op.lhs);
+        const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});

-        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_type), "");
+        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
        return self.wip.bin(.shl, lhs, casted_rhs, "");
    }

@ -9029,6 +9040,8 @@ pub const FuncGen = struct {
        const llvm_lhs_scalar_ty = llvm_lhs_ty.scalarType(&o.builder);

        const rhs_ty = self.typeOf(bin_op.rhs);
+        if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
        const rhs_info = rhs_ty.intInfo(zcu);
        assert(rhs_info.signedness == .unsigned);
        const llvm_rhs_ty = try o.lowerType(rhs_ty);
@ -9101,6 +9114,8 @@ pub const FuncGen = struct {
        const rhs = try self.resolveInst(bin_op.rhs);

        const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
        const lhs_scalar_ty = lhs_ty.scalarType(zcu);

        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
@ -9255,8 +9270,6 @@ pub const FuncGen = struct {
        const operand_ty = self.typeOf(ty_op.operand);
        const dest_ty = self.typeOfIndex(inst);
        const target = zcu.getTarget();
-        const dest_bits = dest_ty.floatBits(target);
-        const src_bits = operand_ty.floatBits(target);

        if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
            return self.wip.cast(.fptrunc, operand, try o.lowerType(dest_ty), "");
@ -9264,6 +9277,8 @@ pub const FuncGen = struct {
            const operand_llvm_ty = try o.lowerType(operand_ty);
            const dest_llvm_ty = try o.lowerType(dest_ty);

+            const dest_bits = dest_ty.floatBits(target);
+            const src_bits = operand_ty.floatBits(target);
            const fn_name = try o.builder.strtabStringFmt("__trunc{s}f{s}f2", .{
                compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
            });
@ -9348,11 +9363,12 @@ pub const FuncGen = struct {
            return self.wip.conv(.unsigned, operand, llvm_dest_ty, "");
        }

-        if (operand_ty.zigTypeTag(zcu) == .int and inst_ty.isPtrAtRuntime(zcu)) {
+        const operand_scalar_ty = operand_ty.scalarType(zcu);
+        const inst_scalar_ty = inst_ty.scalarType(zcu);
+        if (operand_scalar_ty.zigTypeTag(zcu) == .int and inst_scalar_ty.isPtrAtRuntime(zcu)) {
            return self.wip.cast(.inttoptr, operand, llvm_dest_ty, "");
        }
-
-        if (operand_ty.isPtrAtRuntime(zcu) and inst_ty.zigTypeTag(zcu) == .int) {
+        if (operand_scalar_ty.isPtrAtRuntime(zcu) and inst_scalar_ty.zigTypeTag(zcu) == .int) {
            return self.wip.cast(.ptrtoint, operand, llvm_dest_ty, "");
        }

--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@ -28,6 +28,10 @@ const SpvAssembler = @import("spirv/Assembler.zig");

 const InstMap = std.AutoHashMapUnmanaged(Air.Inst.Index, IdRef);

+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pub const zig_call_abi_ver = 3;
 pub const big_int_bits = 32;

@ -3380,6 +3384,10 @@ const NavGen = struct {
        const zcu = self.pt.zcu;
        const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;

+        if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) {
+            return self.fail("vector shift with scalar rhs", .{});
+        }
+
        const base = try self.temporary(bin_op.lhs);
        const shift = try self.temporary(bin_op.rhs);

@ -3866,6 +3874,10 @@ const NavGen = struct {
        const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
        const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;

+        if (self.typeOf(extra.lhs).isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu)) {
+            return self.fail("vector shift with scalar rhs", .{});
+        }
+
        const base = try self.temporary(extra.lhs);
        const shift = try self.temporary(extra.rhs);

--- a/src/target.zig
+++ b/src/target.zig
@ -850,9 +850,5 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
            .stage2_llvm => false,
            else => true,
        },
-        .all_vector_instructions => switch (backend) {
-            .stage2_x86_64 => true,
-            else => false,
-        },
    };
 }