From fca5f3602d697bd3de6a36d4504703693133144c Mon Sep 17 00:00:00 2001
From: Ali Cheraghi <alichraghi@proton.me>
Date: Tue, 6 May 2025 16:25:08 +0330
Subject: [PATCH 1/6] spirv: unroll all vector operations

---
 src/codegen/spirv.zig        | 413 ++++++++---------------------------
 src/codegen/spirv/Module.zig |   2 -
 2 files changed, 87 insertions(+), 328 deletions(-)

diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 177bef0158..66fd40d8de 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -344,8 +344,7 @@ const NavGen = struct {
 
     /// This structure is used to return information about a type typically used for
     /// arithmetic operations. These types may either be integers, floats, or a vector
-    /// of these. Most scalar operations also work on vectors, so we can easily represent
-    /// those as arithmetic types. If the type is a scalar, 'inner type' refers to the
+    /// of these. If the type is a scalar, 'inner type' refers to the
     /// scalar type. Otherwise, if its a vector, it refers to the vector's element type.
     const ArithmeticTypeInfo = struct {
         /// A classification of the inner type.
@@ -615,41 +614,6 @@ const NavGen = struct {
         return if (self.spv.hasFeature(.int64)) 64 else 32;
     }
 
-    /// Checks whether the type is "composite int", an integer consisting of multiple native integers. These are represented by
-    /// arrays of largestSupportedIntBits().
-    /// Asserts `ty` is an integer.
-    fn isCompositeInt(self: *NavGen, ty: Type) bool {
-        return self.backingIntBits(ty) == null;
-    }
-
-    /// Checks whether the type can be directly translated to SPIR-V vectors
-    fn isSpvVector(self: *NavGen, ty: Type) bool {
-        const zcu = self.pt.zcu;
-        if (ty.zigTypeTag(zcu) != .vector) return false;
-
-        // TODO: This check must be expanded for types that can be represented
-        // as integers (enums / packed structs?) and types that are represented
-        // by multiple SPIR-V values.
-        const scalar_ty = ty.scalarType(zcu);
-        switch (scalar_ty.zigTypeTag(zcu)) {
-            .bool,
-            .int,
-            .float,
-            => {},
-            else => return false,
-        }
-
-        const elem_ty = ty.childType(zcu);
-        const len = ty.vectorLen(zcu);
-
-        if (elem_ty.isNumeric(zcu) or elem_ty.toIntern() == .bool_type) {
-            if (len > 1 and len <= 4) return true;
-            if (self.spv.hasFeature(.vector16)) return (len == 8 or len == 16);
-        }
-
-        return false;
-    }
-
     fn arithmeticTypeInfo(self: *NavGen, ty: Type) ArithmeticTypeInfo {
         const zcu = self.pt.zcu;
         const target = self.spv.target;
@@ -659,14 +623,14 @@ const NavGen = struct {
         }
         const vector_len = if (ty.isVector(zcu)) ty.vectorLen(zcu) else null;
         return switch (scalar_ty.zigTypeTag(zcu)) {
-            .bool => ArithmeticTypeInfo{
+            .bool => .{
                 .bits = 1, // Doesn't matter for this class.
                 .backing_bits = self.backingIntBits(1).?,
                 .vector_len = vector_len,
                 .signedness = .unsigned, // Technically, but doesn't matter for this class.
                 .class = .bool,
             },
-            .float => ArithmeticTypeInfo{
+            .float => .{
                 .bits = scalar_ty.floatBits(target),
                 .backing_bits = scalar_ty.floatBits(target), // TODO: F80?
                 .vector_len = vector_len,
@@ -677,16 +641,16 @@ const NavGen = struct {
                 const int_info = scalar_ty.intInfo(zcu);
                 // TODO: Maybe it's useful to also return this value.
                 const maybe_backing_bits = self.backingIntBits(int_info.bits);
-                break :blk ArithmeticTypeInfo{
+                break :blk .{
                     .bits = int_info.bits,
                     .backing_bits = maybe_backing_bits orelse 0,
                     .vector_len = vector_len,
                     .signedness = int_info.signedness,
                     .class = if (maybe_backing_bits) |backing_bits|
                         if (backing_bits == int_info.bits)
-                            ArithmeticTypeInfo.Class.integer
+                            .integer
                         else
-                            ArithmeticTypeInfo.Class.strange_integer
+                            .strange_integer
                     else
                         .composite_integer,
                 };
@@ -1338,19 +1302,6 @@ const NavGen = struct {
         return self.spv.functionType(return_ty_id, param_ids);
     }
 
-    fn zigScalarOrVectorTypeLike(self: *NavGen, new_ty: Type, base_ty: Type) !Type {
-        const pt = self.pt;
-        const new_scalar_ty = new_ty.scalarType(pt.zcu);
-        if (!base_ty.isVector(pt.zcu)) {
-            return new_scalar_ty;
-        }
-
-        return try pt.vectorType(.{
-            .len = base_ty.vectorLen(pt.zcu),
-            .child = new_scalar_ty.toIntern(),
-        });
-    }
-
     /// Generate a union type. Union types are always generated with the
     /// most aligned field active. If the tag alignment is greater
     /// than that of the payload, a regular union (non-packed, with both tag and
@@ -1632,12 +1583,7 @@ const NavGen = struct {
                 const elem_ty = ty.childType(zcu);
                 const elem_ty_id = try self.resolveType(elem_ty, repr);
                 const len = ty.vectorLen(zcu);
-
-                if (self.isSpvVector(ty)) {
-                    return try self.spv.vectorType(len, elem_ty_id);
-                } else {
-                    return try self.arrayType(len, elem_ty_id);
-                }
+                return self.arrayType(len, elem_ty_id);
             },
             .@"struct" => {
                 const struct_type = switch (ip.indexToKey(ty.toIntern())) {
@@ -2035,69 +1981,32 @@ const NavGen = struct {
     const Vectorization = union(enum) {
         /// This is an operation between scalars.
         scalar,
-        /// This is an operation between SPIR-V vectors.
-        /// Value is number of components.
-        spv_vectorized: u32,
         /// This operation is unrolled into separate operations.
         /// Inputs may still be SPIR-V vectors, for example,
         /// when the operation can't be vectorized in SPIR-V.
         /// Value is number of components.
         unrolled: u32,
 
-        /// Derive a vectorization from a particular type. This usually
-        /// only checks the size, but the source-of-truth is implemented
-        /// by `isSpvVector()`.
+        /// Derive a vectorization from a particular type
         fn fromType(ty: Type, ng: *NavGen) Vectorization {
             const zcu = ng.pt.zcu;
-            if (!ty.isVector(zcu)) {
-                return .scalar;
-            } else if (ng.isSpvVector(ty)) {
-                return .{ .spv_vectorized = ty.vectorLen(zcu) };
-            } else {
-                return .{ .unrolled = ty.vectorLen(zcu) };
-            }
+            if (!ty.isVector(zcu)) return .scalar;
+            return .{ .unrolled = ty.vectorLen(zcu) };
         }
 
         /// Given two vectorization methods, compute a "unification": a fallback
         /// that works for both, according to the following rules:
         /// - Scalars may broadcast
-        /// - SPIR-V vectorized operations may unroll
-        /// - Prefer scalar > SPIR-V vectorized > unrolled
+        /// - SPIR-V vectorized operations will unroll
+        /// - Prefer scalar > unrolled
         fn unify(a: Vectorization, b: Vectorization) Vectorization {
-            if (a == .scalar and b == .scalar) {
-                return .scalar;
-            } else if (a == .spv_vectorized and b == .spv_vectorized) {
-                assert(a.components() == b.components());
-                return .{ .spv_vectorized = a.components() };
-            } else if (a == .unrolled or b == .unrolled) {
-                if (a == .unrolled and b == .unrolled) {
-                    assert(a.components() == b.components());
-                    return .{ .unrolled = a.components() };
-                } else if (a == .unrolled) {
-                    return .{ .unrolled = a.components() };
-                } else if (b == .unrolled) {
-                    return .{ .unrolled = b.components() };
-                } else {
-                    unreachable;
-                }
-            } else {
-                if (a == .spv_vectorized) {
-                    return .{ .spv_vectorized = a.components() };
-                } else if (b == .spv_vectorized) {
-                    return .{ .spv_vectorized = b.components() };
-                } else {
-                    unreachable;
-                }
+            if (a == .scalar and b == .scalar) return .scalar;
+            if (a == .unrolled or b == .unrolled) {
+                if (a == .unrolled and b == .unrolled) assert(a.components() == b.components());
+                if (a == .unrolled) return .{ .unrolled = a.components() };
+                return .{ .unrolled = b.components() };
             }
-        }
-
-        /// Force this vectorization to be unrolled, if its
-        /// an operation involving vectors.
-        fn unroll(self: Vectorization) Vectorization {
-            return switch (self) {
-                .scalar, .unrolled => self,
-                .spv_vectorized => |n| .{ .unrolled = n },
-            };
+            unreachable;
         }
 
         /// Query the number of components that inputs of this operation have.
@@ -2106,35 +2015,10 @@ const NavGen = struct {
         fn components(self: Vectorization) u32 {
             return switch (self) {
                 .scalar => 1,
-                .spv_vectorized => |n| n,
                 .unrolled => |n| n,
             };
         }
 
-        /// Query the number of operations involving this vectorization.
-        /// This is basically the number of components, except that SPIR-V vectorized
-        /// operations only need a single SPIR-V instruction.
-        fn operations(self: Vectorization) u32 {
-            return switch (self) {
-                .scalar, .spv_vectorized => 1,
-                .unrolled => |n| n,
-            };
-        }
-
-        /// Turns `ty` into the result-type of an individual vector operation.
-        /// `ty` may be a scalar or vector, it doesn't matter.
-        fn operationType(self: Vectorization, ng: *NavGen, ty: Type) !Type {
-            const pt = ng.pt;
-            const scalar_ty = ty.scalarType(pt.zcu);
-            return switch (self) {
-                .scalar, .unrolled => scalar_ty,
-                .spv_vectorized => |n| try pt.vectorType(.{
-                    .len = n,
-                    .child = scalar_ty.toIntern(),
-                }),
-            };
-        }
-
         /// Turns `ty` into the result-type of the entire operation.
         /// `ty` may be a scalar or vector, it doesn't matter.
         fn resultType(self: Vectorization, ng: *NavGen, ty: Type) !Type {
@@ -2142,10 +2026,7 @@ const NavGen = struct {
             const scalar_ty = ty.scalarType(pt.zcu);
             return switch (self) {
                 .scalar => scalar_ty,
-                .unrolled, .spv_vectorized => |n| try pt.vectorType(.{
-                    .len = n,
-                    .child = scalar_ty.toIntern(),
-                }),
+                .unrolled => |n| try pt.vectorType(.{ .len = n, .child = scalar_ty.toIntern() }),
             };
         }
 
@@ -2155,51 +2036,19 @@ const NavGen = struct {
         fn prepare(self: Vectorization, ng: *NavGen, tmp: Temporary) !PreparedOperand {
             const pt = ng.pt;
             const is_vector = tmp.ty.isVector(pt.zcu);
-            const is_spv_vector = ng.isSpvVector(tmp.ty);
             const value: PreparedOperand.Value = switch (tmp.value) {
                 .singleton => |id| switch (self) {
                     .scalar => blk: {
                         assert(!is_vector);
                         break :blk .{ .scalar = id };
                     },
-                    .spv_vectorized => blk: {
-                        if (is_vector) {
-                            assert(is_spv_vector);
-                            break :blk .{ .spv_vectorwise = id };
-                        }
-
-                        // Broadcast scalar into vector.
-                        const vector_ty = try pt.vectorType(.{
-                            .len = self.components(),
-                            .child = tmp.ty.toIntern(),
-                        });
-
-                        const vector = try ng.constructCompositeSplat(vector_ty, id);
-                        return .{
-                            .ty = vector_ty,
-                            .value = .{ .spv_vectorwise = vector },
-                        };
-                    },
                     .unrolled => blk: {
-                        if (is_vector) {
-                            break :blk .{ .vector_exploded = try tmp.explode(ng) };
-                        } else {
-                            break :blk .{ .scalar_broadcast = id };
-                        }
+                        if (is_vector) break :blk .{ .vector_exploded = try tmp.explode(ng) };
+                        break :blk .{ .scalar_broadcast = id };
                     },
                 },
                 .exploded_vector => |range| switch (self) {
                     .scalar => unreachable,
-                    .spv_vectorized => |n| blk: {
-                        // We can vectorize this operation, but we have an exploded vector. This can happen
-                        // when a vectorizable operation succeeds a non-vectorizable operation. In this case,
-                        // pack up the IDs into a SPIR-V vector. This path should not be able to be hit with
-                        // a type that cannot do that.
-                        assert(is_spv_vector);
-                        assert(range.len == n);
-                        const vec = try tmp.materialize(ng);
-                        break :blk .{ .spv_vectorwise = vec };
-                    },
                     .unrolled => |n| blk: {
                         assert(range.len == n);
                         break :blk .{ .vector_exploded = range };
@@ -2216,17 +2065,14 @@ const NavGen = struct {
         /// Finalize the results of an operation back into a temporary. `results` is
         /// a list of result-ids of the operation.
         fn finalize(self: Vectorization, ty: Type, results: IdRange) Temporary {
-            assert(self.operations() == results.len);
-            const value: Temporary.Value = switch (self) {
-                .scalar, .spv_vectorized => blk: {
-                    break :blk .{ .singleton = results.at(0) };
-                },
-                .unrolled => blk: {
-                    break :blk .{ .exploded_vector = results };
+            assert(self.components() == results.len);
+            return .{
+                .ty = ty,
+                .value = switch (self) {
+                    .scalar => .{ .singleton = results.at(0) },
+                    .unrolled => .{ .exploded_vector = results },
                 },
             };
-
-            return .{ .ty = ty, .value = value };
         }
 
         /// This struct represents an operand that has gone through some setup, and is
@@ -2242,32 +2088,20 @@ const NavGen = struct {
                 scalar: IdResult,
                 /// A single scalar that is broadcasted in an unrolled operation.
                 scalar_broadcast: IdResult,
-                /// A SPIR-V vector that is used in SPIR-V vectorize operation.
-                spv_vectorwise: IdResult,
                 /// A vector represented by a consecutive list of IDs that is used in an unrolled operation.
                 vector_exploded: IdRange,
             };
 
             /// Query the value at a particular index of the operation. Note that
-            /// the index is *not* the component/lane, but the index of the *operation*. When
-            /// this operation is vectorized, the return value of this function is a SPIR-V vector.
-            /// See also `Vectorization.operations()`.
+            /// the index is *not* the component/lane, but the index of the *operation*.
             fn at(self: PreparedOperand, i: usize) IdResult {
                 switch (self.value) {
                     .scalar => |id| {
                         assert(i == 0);
                         return id;
                     },
-                    .scalar_broadcast => |id| {
-                        return id;
-                    },
-                    .spv_vectorwise => |id| {
-                        assert(i == 0);
-                        return id;
-                    },
-                    .vector_exploded => |range| {
-                        return range.at(i);
-                    },
+                    .scalar_broadcast => |id| return id,
+                    .vector_exploded => |range| return range.at(i),
                 }
             }
         };
@@ -2299,7 +2133,7 @@ const NavGen = struct {
 
     /// This function builds an OpSConvert of OpUConvert depending on the
     /// signedness of the types.
-    fn buildIntConvert(self: *NavGen, dst_ty: Type, src: Temporary) !Temporary {
+    fn buildConvert(self: *NavGen, dst_ty: Type, src: Temporary) !Temporary {
         const zcu = self.pt.zcu;
 
         const dst_ty_id = try self.resolveType(dst_ty.scalarType(zcu), .direct);
@@ -2318,13 +2152,17 @@ const NavGen = struct {
             return src.pun(result_ty);
         }
 
-        const ops = v.operations();
+        const ops = v.components();
         const results = self.spv.allocIds(ops);
 
-        const op_result_ty = try v.operationType(self, dst_ty);
+        const op_result_ty = dst_ty.scalarType(zcu);
         const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
 
-        const opcode: Opcode = if (dst_ty.isSignedInt(zcu)) .OpSConvert else .OpUConvert;
+        const opcode: Opcode = blk: {
+            if (dst_ty.scalarType(zcu).isAnyFloat()) break :blk .OpFConvert;
+            if (dst_ty.scalarType(zcu).isSignedInt(zcu)) break :blk .OpSConvert;
+            break :blk .OpUConvert;
+        };
 
         const op_src = try v.prepare(self, src);
 
@@ -2339,13 +2177,14 @@ const NavGen = struct {
     }
 
     fn buildFma(self: *NavGen, a: Temporary, b: Temporary, c: Temporary) !Temporary {
+        const zcu = self.pt.zcu;
         const target = self.spv.target;
 
         const v = self.vectorization(.{ a, b, c });
-        const ops = v.operations();
+        const ops = v.components();
         const results = self.spv.allocIds(ops);
 
-        const op_result_ty = try v.operationType(self, a.ty);
+        const op_result_ty = a.ty.scalarType(zcu);
         const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
         const result_ty = try v.resultType(self, a.ty);
 
@@ -2382,10 +2221,10 @@ const NavGen = struct {
         const zcu = self.pt.zcu;
 
         const v = self.vectorization(.{ condition, lhs, rhs });
-        const ops = v.operations();
+        const ops = v.components();
         const results = self.spv.allocIds(ops);
 
-        const op_result_ty = try v.operationType(self, lhs.ty);
+        const op_result_ty = lhs.ty.scalarType(zcu);
         const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
         const result_ty = try v.resultType(self, lhs.ty);
 
@@ -2431,10 +2270,10 @@ const NavGen = struct {
 
     fn buildCmp(self: *NavGen, pred: CmpPredicate, lhs: Temporary, rhs: Temporary) !Temporary {
         const v = self.vectorization(.{ lhs, rhs });
-        const ops = v.operations();
+        const ops = v.components();
         const results = self.spv.allocIds(ops);
 
-        const op_result_ty = try v.operationType(self, Type.bool);
+        const op_result_ty: Type = .bool;
         const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
         const result_ty = try v.resultType(self, Type.bool);
 
@@ -2498,22 +2337,12 @@ const NavGen = struct {
     };
 
     fn buildUnary(self: *NavGen, op: UnaryOp, operand: Temporary) !Temporary {
+        const zcu = self.pt.zcu;
         const target = self.spv.target;
-        const v = blk: {
-            const v = self.vectorization(.{operand});
-            break :blk switch (op) {
-                // TODO: These instructions don't seem to be working
-                // properly for LLVM-based backends on OpenCL for 8- and
-                // 16-component vectors.
-                .i_abs => if (self.spv.hasFeature(.vector16) and v.components() >= 8) v.unroll() else v,
-                else => v,
-            };
-        };
-
-        const ops = v.operations();
+        const v = self.vectorization(.{operand});
+        const ops = v.components();
         const results = self.spv.allocIds(ops);
-
-        const op_result_ty = try v.operationType(self, operand.ty);
+        const op_result_ty = operand.ty.scalarType(zcu);
         const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
         const result_ty = try v.resultType(self, operand.ty);
 
@@ -2628,13 +2457,14 @@ const NavGen = struct {
     };
 
     fn buildBinary(self: *NavGen, op: BinaryOp, lhs: Temporary, rhs: Temporary) !Temporary {
+        const zcu = self.pt.zcu;
         const target = self.spv.target;
 
         const v = self.vectorization(.{ lhs, rhs });
-        const ops = v.operations();
+        const ops = v.components();
         const results = self.spv.allocIds(ops);
 
-        const op_result_ty = try v.operationType(self, lhs.ty);
+        const op_result_ty = lhs.ty.scalarType(zcu);
         const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
         const result_ty = try v.resultType(self, lhs.ty);
 
@@ -2730,9 +2560,9 @@ const NavGen = struct {
         const ip = &zcu.intern_pool;
 
         const v = lhs.vectorization(self).unify(rhs.vectorization(self));
-        const ops = v.operations();
+        const ops = v.components();
 
-        const arith_op_ty = try v.operationType(self, lhs.ty);
+        const arith_op_ty = lhs.ty.scalarType(zcu);
         const arith_op_ty_id = try self.resolveType(arith_op_ty, .direct);
 
         const lhs_op = try v.prepare(self, lhs);
@@ -3175,17 +3005,18 @@ const NavGen = struct {
     /// Convert representation from indirect (in memory) to direct (in 'register')
     /// This converts the argument type from resolveType(ty, .indirect) to resolveType(ty, .direct).
     fn convertToDirect(self: *NavGen, ty: Type, operand_id: IdRef) !IdRef {
-        const zcu = self.pt.zcu;
+        const pt = self.pt;
+        const zcu = pt.zcu;
         switch (ty.scalarType(zcu).zigTypeTag(zcu)) {
             .bool => {
                 const false_id = try self.constBool(false, .indirect);
-                // The operation below requires inputs in direct representation, but the operand
-                // is actually in indirect representation.
-                // Cheekily swap out the type to the direct equivalent of the indirect type here, they have the
-                // same representation when converted to SPIR-V.
-                const operand_ty = try self.zigScalarOrVectorTypeLike(Type.u1, ty);
-                // Note: We can guarantee that these are the same ID due to the SPIR-V Module's `vector_types` cache!
-                assert(try self.resolveType(operand_ty, .direct) == try self.resolveType(ty, .indirect));
+                const operand_ty = blk: {
+                    if (!ty.isVector(pt.zcu)) break :blk Type.u1;
+                    break :blk try pt.vectorType(.{
+                        .len = ty.vectorLen(pt.zcu),
+                        .child = Type.u1.toIntern(),
+                    });
+                };
 
                 const result = try self.buildCmp(
                     .i_ne,
@@ -3226,7 +3057,6 @@ const NavGen = struct {
     }
 
     fn extractVectorComponent(self: *NavGen, result_ty: Type, vector_id: IdRef, field: u32) !IdRef {
-        // Whether this is an OpTypeVector or OpTypeArray, we need to emit the same instruction regardless.
         const result_ty_id = try self.resolveType(result_ty, .direct);
         const result_id = self.spv.allocId();
         const indexes = [_]u32{field};
@@ -3485,7 +3315,7 @@ const NavGen = struct {
         // Note: The sign may differ here between the shift and the base type, in case
         // of an arithmetic right shift. SPIR-V still expects the same type,
         // so in that case we have to cast convert to signed.
-        const casted_shift = try self.buildIntConvert(base.ty.scalarType(zcu), shift);
+        const casted_shift = try self.buildConvert(base.ty.scalarType(zcu), shift);
 
         const shifted = switch (info.signedness) {
             .unsigned => try self.buildBinary(unsigned, base, casted_shift),
@@ -3815,12 +3645,12 @@ const NavGen = struct {
             .unsigned => blk: {
                 if (maybe_op_ty_bits) |op_ty_bits| {
                     const op_ty = try pt.intType(.unsigned, op_ty_bits);
-                    const casted_lhs = try self.buildIntConvert(op_ty, lhs);
-                    const casted_rhs = try self.buildIntConvert(op_ty, rhs);
+                    const casted_lhs = try self.buildConvert(op_ty, lhs);
+                    const casted_rhs = try self.buildConvert(op_ty, rhs);
 
                     const full_result = try self.buildBinary(.i_mul, casted_lhs, casted_rhs);
 
-                    const low_bits = try self.buildIntConvert(lhs.ty, full_result);
+                    const low_bits = try self.buildConvert(lhs.ty, full_result);
                     const result = try self.normalize(low_bits, info);
 
                     // Shift the result bits away to get the overflow bits.
@@ -3846,9 +3676,7 @@ const NavGen = struct {
                 const high_overflowed = try self.buildCmp(.i_ne, zero, high_bits);
 
                 // If no overflow bits in low_bits, no extra work needs to be done.
-                if (info.backing_bits == info.bits) {
-                    break :blk .{ result, high_overflowed };
-                }
+                if (info.backing_bits == info.bits) break :blk .{ result, high_overflowed };
 
                 // Shift the result bits away to get the overflow bits.
                 const shift = Temporary.init(lhs.ty, try self.constInt(lhs.ty, info.bits));
@@ -3886,13 +3714,13 @@ const NavGen = struct {
                 if (maybe_op_ty_bits) |op_ty_bits| {
                     const op_ty = try pt.intType(.signed, op_ty_bits);
                     // Assume normalized; sign bit is set. We want a sign extend.
-                    const casted_lhs = try self.buildIntConvert(op_ty, lhs);
-                    const casted_rhs = try self.buildIntConvert(op_ty, rhs);
+                    const casted_lhs = try self.buildConvert(op_ty, lhs);
+                    const casted_rhs = try self.buildConvert(op_ty, rhs);
 
                     const full_result = try self.buildBinary(.i_mul, casted_lhs, casted_rhs);
 
                     // Truncate to the result type.
-                    const low_bits = try self.buildIntConvert(lhs.ty, full_result);
+                    const low_bits = try self.buildConvert(lhs.ty, full_result);
                     const result = try self.normalize(low_bits, info);
 
                     // Now, we need to check the overflow bits AND the sign
@@ -3929,9 +3757,7 @@ const NavGen = struct {
                 // If no overflow bits in low_bits, no extra work needs to be done.
                 // Careful, we still have to check the sign bit, so this branch
                 // only goes for i33 and such.
-                if (info.backing_bits == info.bits + 1) {
-                    break :blk .{ result, high_overflowed };
-                }
+                if (info.backing_bits == info.bits + 1) break :blk .{ result, high_overflowed };
 
                 // Shift the result bits away to get the overflow bits.
                 const shift = Temporary.init(lhs.ty, try self.constInt(lhs.ty, info.bits - 1));
@@ -3972,7 +3798,7 @@ const NavGen = struct {
 
         // Sometimes Zig doesn't make both of the arguments the same types here. SPIR-V expects that,
         // so just manually upcast it if required.
-        const casted_shift = try self.buildIntConvert(base.ty.scalarType(zcu), shift);
+        const casted_shift = try self.buildConvert(base.ty.scalarType(zcu), shift);
 
         const left = try self.buildBinary(.sll, base, casted_shift);
         const result = try self.normalize(left, info);
@@ -4026,7 +3852,7 @@ const NavGen = struct {
         // Result of OpenCL ctz/clz returns operand.ty, and we want result_ty.
         // result_ty is always large enough to hold the result, so we might have to down
         // cast it.
-        const result = try self.buildIntConvert(scalar_result_ty, count);
+        const result = try self.buildConvert(scalar_result_ty, count);
         return try result.materialize(self);
     }
 
@@ -4057,11 +3883,8 @@ const NavGen = struct {
         const operand_ty = self.typeOf(reduce.operand);
         const scalar_ty = operand_ty.scalarType(zcu);
         const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
-
         const info = self.arithmeticTypeInfo(operand_ty);
-
         const len = operand_ty.vectorLen(zcu);
-
         const first = try self.extractVectorComponent(scalar_ty, operand, 0);
 
         switch (reduce.operation) {
@@ -4136,51 +3959,9 @@ const NavGen = struct {
 
         // Note: number of components in the result, a, and b may differ.
         const result_ty = self.typeOfIndex(inst);
-        const a_ty = self.typeOf(extra.a);
-        const b_ty = self.typeOf(extra.b);
-
         const scalar_ty = result_ty.scalarType(zcu);
         const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
 
-        // If all of the types are SPIR-V vectors, we can use OpVectorShuffle.
-        if (self.isSpvVector(result_ty) and self.isSpvVector(a_ty) and self.isSpvVector(b_ty)) {
-            // The SPIR-V shuffle instruction is similar to the Air instruction, except that the elements are
-            // numbered consecutively instead of using negatives.
-
-            const components = try self.gpa.alloc(Word, result_ty.vectorLen(zcu));
-            defer self.gpa.free(components);
-
-            const a_len = a_ty.vectorLen(zcu);
-
-            for (components, 0..) |*component, i| {
-                const elem = try mask.elemValue(pt, i);
-                if (elem.isUndef(zcu)) {
-                    // This is explicitly valid for OpVectorShuffle, it indicates undefined.
-                    component.* = 0xFFFF_FFFF;
-                    continue;
-                }
-
-                const index = elem.toSignedInt(zcu);
-                if (index >= 0) {
-                    component.* = @intCast(index);
-                } else {
-                    component.* = @intCast(~index + a_len);
-                }
-            }
-
-            const result_id = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpVectorShuffle, .{
-                .id_result_type = try self.resolveType(result_ty, .direct),
-                .id_result = result_id,
-                .vector_1 = a,
-                .vector_2 = b,
-                .components = components,
-            });
-            return result_id;
-        }
-
-        // Fall back to manually extracting and inserting components.
-
         const constituents = try self.gpa.alloc(IdRef, result_ty.vectorLen(zcu));
         defer self.gpa.free(constituents);
 
@@ -4535,9 +4316,7 @@ const NavGen = struct {
         const dst_ty_id = try self.resolveType(dst_ty, .direct);
 
         const result_id = blk: {
-            if (src_ty_id == dst_ty_id) {
-                break :blk src_id;
-            }
+            if (src_ty_id == dst_ty_id) break :blk src_id;
 
             // TODO: Some more cases are missing here
             //   See fn bitCast in llvm.zig
@@ -4618,7 +4397,7 @@ const NavGen = struct {
             return try src.materialize(self);
         }
 
-        const converted = try self.buildIntConvert(dst_ty, src);
+        const converted = try self.buildConvert(dst_ty, src);
 
         // Make sure to normalize the result if shrinking.
         // Because strange ints are sign extended in their backing
@@ -4698,17 +4477,10 @@ const NavGen = struct {
 
     fn airFloatCast(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
         const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-        const operand_id = try self.resolve(ty_op.operand);
+        const operand = try self.temporary(ty_op.operand);
         const dest_ty = self.typeOfIndex(inst);
-        const dest_ty_id = try self.resolveType(dest_ty, .direct);
-
-        const result_id = self.spv.allocId();
-        try self.func.body.emit(self.spv.gpa, .OpFConvert, .{
-            .id_result_type = dest_ty_id,
-            .id_result = result_id,
-            .float_value = operand_id,
-        });
-        return result_id;
+        const result = try self.buildConvert(dest_ty, operand);
+        return try result.materialize(self);
     }
 
     fn airNot(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
@@ -4796,7 +4568,7 @@ const NavGen = struct {
                             break :blk try self.bitCast(field_int_ty, field_ty, field_id);
                         };
                         const shift_rhs = try self.constInt(backing_int_ty, running_bits);
-                        const extended_int_conv = try self.buildIntConvert(backing_int_ty, .{
+                        const extended_int_conv = try self.buildConvert(backing_int_ty, .{
                             .ty = field_int_ty,
                             .value = .{ .singleton = field_int_id },
                         });
@@ -5016,17 +4788,6 @@ const NavGen = struct {
         const array_id = try self.resolve(bin_op.lhs);
         const index_id = try self.resolve(bin_op.rhs);
 
-        if (self.isSpvVector(array_ty)) {
-            const result_id = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpVectorExtractDynamic, .{
-                .id_result_type = try self.resolveType(elem_ty, .direct),
-                .id_result = result_id,
-                .vector = array_id,
-                .index = index_id,
-            });
-            return result_id;
-        }
-
         // SPIR-V doesn't have an array indexing function for some damn reason.
         // For now, just generate a temporary and use that.
         // TODO: This backend probably also should use isByRef from llvm...
@@ -5173,7 +4934,7 @@ const NavGen = struct {
                     return self.bitCast(ty, payload_ty, payload.?);
                 }
 
-                const trunc = try self.buildIntConvert(ty, .{ .ty = payload_ty, .value = .{ .singleton = payload.? } });
+                const trunc = try self.buildConvert(ty, .{ .ty = payload_ty, .value = .{ .singleton = payload.? } });
                 return try trunc.materialize(self);
             }
 
@@ -5182,7 +4943,7 @@ const NavGen = struct {
                 try self.convertToIndirect(payload_ty, payload.?)
             else
                 try self.bitCast(payload_int_ty, payload_ty, payload.?);
-            const trunc = try self.buildIntConvert(ty, .{ .ty = payload_int_ty, .value = .{ .singleton = payload_int } });
+            const trunc = try self.buildConvert(ty, .{ .ty = payload_int_ty, .value = .{ .singleton = payload_int } });
             return try trunc.materialize(self);
         }
 
@@ -5273,7 +5034,7 @@ const NavGen = struct {
                     const result_id = blk: {
                         if (self.backingIntBits(field_bit_size).? == self.backingIntBits(@intCast(object_ty.bitSize(zcu))).?)
                             break :blk try self.bitCast(field_int_ty, object_ty, try masked.materialize(self));
-                        const trunc = try self.buildIntConvert(field_int_ty, masked);
+                        const trunc = try self.buildConvert(field_int_ty, masked);
                         break :blk try trunc.materialize(self);
                     };
                     if (field_ty.ip_index == .bool_type) return try self.convertToDirect(.bool, result_id);
@@ -5297,7 +5058,7 @@ const NavGen = struct {
                     const result_id = blk: {
                         if (self.backingIntBits(field_bit_size).? == self.backingIntBits(@intCast(backing_int_ty.bitSize(zcu))).?)
                             break :blk try self.bitCast(int_ty, backing_int_ty, try masked.materialize(self));
-                        const trunc = try self.buildIntConvert(int_ty, masked);
+                        const trunc = try self.buildConvert(int_ty, masked);
                         break :blk try trunc.materialize(self);
                     };
                     if (field_ty.ip_index == .bool_type) return try self.convertToDirect(.bool, result_id);
@@ -6752,7 +6513,7 @@ const NavGen = struct {
         // TODO: Should we make these builtins return usize?
         const result_id = try self.builtin3D(Type.u64, .LocalInvocationId, dimension, 0);
         const tmp = Temporary.init(Type.u64, result_id);
-        const result = try self.buildIntConvert(Type.u32, tmp);
+        const result = try self.buildConvert(Type.u32, tmp);
         return try result.materialize(self);
     }
 
@@ -6763,7 +6524,7 @@ const NavGen = struct {
         // TODO: Should we make these builtins return usize?
         const result_id = try self.builtin3D(Type.u64, .WorkgroupSize, dimension, 0);
         const tmp = Temporary.init(Type.u64, result_id);
-        const result = try self.buildIntConvert(Type.u32, tmp);
+        const result = try self.buildConvert(Type.u32, tmp);
         return try result.materialize(self);
     }
 
@@ -6774,7 +6535,7 @@ const NavGen = struct {
         // TODO: Should we make these builtins return usize?
         const result_id = try self.builtin3D(Type.u64, .WorkgroupId, dimension, 0);
         const tmp = Temporary.init(Type.u64, result_id);
-        const result = try self.buildIntConvert(Type.u32, tmp);
+        const result = try self.buildConvert(Type.u32, tmp);
         return try result.materialize(self);
     }
 
diff --git a/src/codegen/spirv/Module.zig b/src/codegen/spirv/Module.zig
index 1aa082f6bc..8f69276e1e 100644
--- a/src/codegen/spirv/Module.zig
+++ b/src/codegen/spirv/Module.zig
@@ -164,8 +164,6 @@ cache: struct {
     void_type: ?IdRef = null,
     int_types: std.AutoHashMapUnmanaged(std.builtin.Type.Int, IdRef) = .empty,
     float_types: std.AutoHashMapUnmanaged(std.builtin.Type.Float, IdRef) = .empty,
-    // This cache is required so that @Vector(X, u1) in direct representation has the
-    // same ID as @Vector(X, bool) in indirect representation.
     vector_types: std.AutoHashMapUnmanaged(struct { IdRef, u32 }, IdRef) = .empty,
     array_types: std.AutoHashMapUnmanaged(struct { IdRef, IdRef }, IdRef) = .empty,
 

From 0901328f12e7ea3d05dc1d5b4a588e595c4bc0bc Mon Sep 17 00:00:00 2001
From: Ali Cheraghi <alichraghi@proton.me>
Date: Wed, 7 May 2025 15:03:42 +0330
Subject: [PATCH 2/6] spirv: write error value in an storage buffer

---
 lib/std/Target.zig           |   2 +-
 lib/std/Target/spirv.zig     |   8 ++-
 lib/std/builtin.zig          |   1 +
 src/codegen/spirv.zig        | 113 +++++++++++++++++++----------------
 src/codegen/spirv/Module.zig |  26 ++++----
 src/target.zig               |  21 ++++---
 6 files changed, 96 insertions(+), 75 deletions(-)

diff --git a/lib/std/Target.zig b/lib/std/Target.zig
index 9148fd5fdc..bf5a6369b5 100644
--- a/lib/std/Target.zig
+++ b/lib/std/Target.zig
@@ -2014,7 +2014,7 @@ pub const Cpu = struct {
             .global, .local, .shared => is_gpu,
             .constant => is_gpu and (context == null or context == .constant),
             .param => is_nvptx,
-            .input, .output, .uniform, .push_constant, .storage_buffer => is_spirv,
+            .input, .output, .uniform, .push_constant, .storage_buffer, .physical_storage_buffer => is_spirv,
         };
     }
 };
diff --git a/lib/std/Target/spirv.zig b/lib/std/Target/spirv.zig
index a2575b2fe8..90abacdd08 100644
--- a/lib/std/Target/spirv.zig
+++ b/lib/std/Target/spirv.zig
@@ -21,6 +21,7 @@ pub const Feature = enum {
     generic_pointer,
     vector16,
     shader,
+    variable_pointers,
     physical_storage_buffer,
 };
 
@@ -129,6 +130,11 @@ pub const all_features = blk: {
         .description = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
         .dependencies = featureSet(&[_]Feature{.v1_0}),
     };
+    result[@intFromEnum(Feature.variable_pointers)] = .{
+        .llvm_name = null,
+        .description = "Enable SPV_KHR_variable_pointers extension and the (VariablePointers, VariablePointersStorageBuffer) capabilities",
+        .dependencies = featureSet(&[_]Feature{.v1_0}),
+    };
     const ti = @typeInfo(Feature);
     for (&result, 0..) |*elem, i| {
         elem.index = i;
@@ -147,7 +153,7 @@ pub const cpu = struct {
     pub const vulkan_v1_2: CpuModel = .{
         .name = "vulkan_v1_2",
         .llvm_name = null,
-        .features = featureSet(&[_]Feature{ .v1_5, .shader, .physical_storage_buffer }),
+        .features = featureSet(&[_]Feature{ .v1_5, .shader }),
     };
 
     pub const opencl_v2: CpuModel = .{
diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig
index 852b94c324..1683cc500b 100644
--- a/lib/std/builtin.zig
+++ b/lib/std/builtin.zig
@@ -531,6 +531,7 @@ pub const AddressSpace = enum(u5) {
     uniform,
     push_constant,
     storage_buffer,
+    physical_storage_buffer,
 
     // AVR address spaces.
     flash,
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 66fd40d8de..b5bba61016 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -169,12 +169,10 @@ pub const Object = struct {
     ///   via the usual `intern_map` mechanism.
     ptr_types: PtrTypeMap = .{},
 
-    /// For test declarations for Vulkan, we have to add a push constant with a pointer to a
-    /// buffer that we can use. We only need to generate this once, this holds the link information
+    /// For test declarations for Vulkan, we have to add a buffer.
+    /// We only need to generate this once, this holds the link information
     /// related to that.
-    error_push_constant: ?struct {
-        push_constant_ptr: SpvModule.Decl.Index,
-    } = null,
+    error_buffer: ?SpvModule.Decl.Index = null,
 
     pub fn init(gpa: Allocator, target: std.Target) Object {
         return .{
@@ -1739,15 +1737,34 @@ const NavGen = struct {
     fn spvStorageClass(self: *NavGen, as: std.builtin.AddressSpace) StorageClass {
         return switch (as) {
             .generic => if (self.spv.hasFeature(.generic_pointer)) .Generic else .Function,
+            .global => {
+                if (self.spv.hasFeature(.kernel)) return .CrossWorkgroup;
+                return .StorageBuffer;
+            },
+            .push_constant => {
+                assert(self.spv.hasFeature(.shader));
+                return .PushConstant;
+            },
+            .output => {
+                assert(self.spv.hasFeature(.shader));
+                return .Output;
+            },
+            .uniform => {
+                assert(self.spv.hasFeature(.shader));
+                return .Uniform;
+            },
+            .storage_buffer => {
+                assert(self.spv.hasFeature(.shader));
+                return .StorageBuffer;
+            },
+            .physical_storage_buffer => {
+                assert(self.spv.hasFeature(.physical_storage_buffer));
+                return .PhysicalStorageBuffer;
+            },
+            .constant => .UniformConstant,
             .shared => .Workgroup,
             .local => .Function,
-            .global => if (self.spv.hasFeature(.shader)) .PhysicalStorageBuffer else .CrossWorkgroup,
-            .constant => .UniformConstant,
-            .push_constant => .PushConstant,
             .input => .Input,
-            .output => .Output,
-            .uniform => .Uniform,
-            .storage_buffer => .StorageBuffer,
             .gs,
             .fs,
             .ss,
@@ -2713,38 +2730,32 @@ const NavGen = struct {
                 });
             },
             .vulkan, .opengl => {
-                const ptr_ptr_anyerror_ty_id = self.spv.allocId();
-                try self.spv.sections.types_globals_constants.emit(self.spv.gpa, .OpTypePointer, .{
-                    .id_result = ptr_ptr_anyerror_ty_id,
-                    .storage_class = .PushConstant,
-                    .type = ptr_anyerror_ty_id,
-                });
-
-                if (self.object.error_push_constant == null) {
+                if (self.object.error_buffer == null) {
                     const spv_err_decl_index = try self.spv.allocDecl(.global);
                     try self.spv.declareDeclDeps(spv_err_decl_index, &.{});
 
-                    const push_constant_struct_ty_id = self.spv.allocId();
-                    try self.spv.structType(push_constant_struct_ty_id, &.{ptr_anyerror_ty_id}, &.{"error_out_ptr"});
-                    try self.spv.decorate(push_constant_struct_ty_id, .Block);
-                    try self.spv.decorateMember(push_constant_struct_ty_id, 0, .{ .Offset = .{ .byte_offset = 0 } });
+                    const buffer_struct_ty_id = self.spv.allocId();
+                    try self.spv.structType(buffer_struct_ty_id, &.{anyerror_ty_id}, &.{"error_out"});
+                    try self.spv.decorate(buffer_struct_ty_id, .Block);
+                    try self.spv.decorateMember(buffer_struct_ty_id, 0, .{ .Offset = .{ .byte_offset = 0 } });
 
-                    const ptr_push_constant_struct_ty_id = self.spv.allocId();
+                    const ptr_buffer_struct_ty_id = self.spv.allocId();
                     try self.spv.sections.types_globals_constants.emit(self.spv.gpa, .OpTypePointer, .{
-                        .id_result = ptr_push_constant_struct_ty_id,
-                        .storage_class = .PushConstant,
-                        .type = push_constant_struct_ty_id,
+                        .id_result = ptr_buffer_struct_ty_id,
+                        .storage_class = self.spvStorageClass(.global),
+                        .type = buffer_struct_ty_id,
                     });
 
+                    const buffer_struct_id = self.spv.declPtr(spv_err_decl_index).result_id;
                     try self.spv.sections.types_globals_constants.emit(self.spv.gpa, .OpVariable, .{
-                        .id_result_type = ptr_push_constant_struct_ty_id,
-                        .id_result = self.spv.declPtr(spv_err_decl_index).result_id,
-                        .storage_class = .PushConstant,
+                        .id_result_type = ptr_buffer_struct_ty_id,
+                        .id_result = buffer_struct_id,
+                        .storage_class = self.spvStorageClass(.global),
                     });
+                    try self.spv.decorate(buffer_struct_id, .{ .DescriptorSet = .{ .descriptor_set = 0 } });
+                    try self.spv.decorate(buffer_struct_id, .{ .Binding = .{ .binding_point = 0 } });
 
-                    self.object.error_push_constant = .{
-                        .push_constant_ptr = spv_err_decl_index,
-                    };
+                    self.object.error_buffer = spv_err_decl_index;
                 }
 
                 try self.spv.sections.execution_modes.emit(self.spv.gpa, .OpExecutionMode, .{
@@ -2767,24 +2778,16 @@ const NavGen = struct {
                     .id_result = self.spv.allocId(),
                 });
 
-                const spv_err_decl_index = self.object.error_push_constant.?.push_constant_ptr;
-                const push_constant_id = self.spv.declPtr(spv_err_decl_index).result_id;
+                const spv_err_decl_index = self.object.error_buffer.?;
+                const buffer_id = self.spv.declPtr(spv_err_decl_index).result_id;
                 try decl_deps.append(spv_err_decl_index);
 
                 const zero_id = try self.constInt(Type.u32, 0);
-                // We cannot use OpInBoundsAccessChain to dereference cross-storage class, so we have to use
-                // a load.
-                const tmp = self.spv.allocId();
                 try section.emit(self.spv.gpa, .OpInBoundsAccessChain, .{
-                    .id_result_type = ptr_ptr_anyerror_ty_id,
-                    .id_result = tmp,
-                    .base = push_constant_id,
-                    .indexes = &.{zero_id},
-                });
-                try section.emit(self.spv.gpa, .OpLoad, .{
                     .id_result_type = ptr_anyerror_ty_id,
                     .id_result = p_error_id,
-                    .pointer = tmp,
+                    .base = buffer_id,
+                    .indexes = &.{zero_id},
                 });
             },
             else => unreachable,
@@ -4562,7 +4565,8 @@ const NavGen = struct {
                         const field_int_id = blk: {
                             if (field_ty.isPtrAtRuntime(zcu)) {
                                 assert(self.spv.hasFeature(.addresses) or
-                                    (self.spv.hasFeature(.physical_storage_buffer) and field_ty.ptrAddressSpace(zcu) == .storage_buffer));
+                                    (self.spv.hasFeature(.physical_storage_buffer) and
+                                        field_ty.ptrAddressSpace(zcu) == .storage_buffer));
                                 break :blk try self.intFromPtr(field_id);
                             }
                             break :blk try self.bitCast(field_int_ty, field_ty, field_id);
@@ -4969,13 +4973,16 @@ const NavGen = struct {
         if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
             const pl_ptr_ty_id = try self.ptrType(layout.payload_ty, .Function, .indirect);
             const pl_ptr_id = try self.accessChain(pl_ptr_ty_id, tmp_id, &.{layout.payload_index});
-            const active_pl_ptr_ty_id = try self.ptrType(payload_ty, .Function, .indirect);
-            const active_pl_ptr_id = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
-                .id_result_type = active_pl_ptr_ty_id,
-                .id_result = active_pl_ptr_id,
-                .operand = pl_ptr_id,
-            });
+            const active_pl_ptr_id = if (!layout.payload_ty.eql(payload_ty, zcu)) blk: {
+                const active_pl_ptr_ty_id = try self.ptrType(payload_ty, .Function, .indirect);
+                const active_pl_ptr_id = self.spv.allocId();
+                try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
+                    .id_result_type = active_pl_ptr_ty_id,
+                    .id_result = active_pl_ptr_id,
+                    .operand = pl_ptr_id,
+                });
+                break :blk active_pl_ptr_id;
+            } else pl_ptr_id;
 
             try self.store(payload_ty, active_pl_ptr_id, payload.?, .{});
         } else {
diff --git a/src/codegen/spirv/Module.zig b/src/codegen/spirv/Module.zig
index 8f69276e1e..16c32c26d5 100644
--- a/src/codegen/spirv/Module.zig
+++ b/src/codegen/spirv/Module.zig
@@ -350,6 +350,11 @@ pub fn finalize(self: *Module, a: Allocator) ![]Word {
                 .vector16 => try self.addCapability(.Vector16),
                 // Shader
                 .shader => try self.addCapability(.Shader),
+                .variable_pointers => {
+                    try self.addExtension("SPV_KHR_variable_pointers");
+                    try self.addCapability(.VariablePointersStorageBuffer);
+                    try self.addCapability(.VariablePointers);
+                },
                 .physical_storage_buffer => {
                     try self.addExtension("SPV_KHR_physical_storage_buffer");
                     try self.addCapability(.PhysicalStorageBufferAddresses);
@@ -364,20 +369,17 @@ pub fn finalize(self: *Module, a: Allocator) ![]Word {
     // Emit memory model
     const addressing_model: spec.AddressingModel = blk: {
         if (self.hasFeature(.shader)) {
-            break :blk switch (self.target.cpu.arch) {
-                .spirv32 => .Logical, // TODO: I don't think this will ever be implemented.
-                .spirv64 => .PhysicalStorageBuffer64,
-                else => unreachable,
-            };
-        } else if (self.hasFeature(.kernel)) {
-            break :blk switch (self.target.cpu.arch) {
-                .spirv32 => .Physical32,
-                .spirv64 => .Physical64,
-                else => unreachable,
-            };
+            assert(self.target.cpu.arch == .spirv64);
+            if (self.hasFeature(.physical_storage_buffer)) break :blk .PhysicalStorageBuffer64;
+            break :blk .Logical;
         }
 
-        unreachable;
+        assert(self.hasFeature(.kernel));
+        break :blk switch (self.target.cpu.arch) {
+            .spirv32 => .Physical32,
+            .spirv64 => .Physical64,
+            else => unreachable,
+        };
     };
     try self.sections.memory_model.emit(self.gpa, .OpMemoryModel, .{
         .addressing_model = addressing_model,
diff --git a/src/target.zig b/src/target.zig
index 4931b11eba..c5b2d97efb 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -501,21 +501,26 @@ pub fn addrSpaceCastIsValid(
 /// part of a merge (result of a branch) and may not be stored in memory at all. This function returns
 /// for a particular architecture and address space wether such pointers are logical.
 pub fn arePointersLogical(target: std.Target, as: AddressSpace) bool {
-    if (target.os.tag != .vulkan) {
-        return false;
-    }
+    if (target.os.tag != .vulkan) return false;
 
     return switch (as) {
         // TODO: Vulkan doesn't support pointers in the generic address space, we
         // should remove this case but this requires a change in defaultAddressSpace().
         // For now, at least disable them from being regarded as physical.
         .generic => true,
-        // For now, all global pointers are represented using PhysicalStorageBuffer, so these are real
-        // pointers.
+        // For now, all global pointers are represented using StorageBuffer or CrossWorkgroup,
+        // so these are real pointers.
         .global => false,
-        // TODO: Allowed with VK_KHR_variable_pointers.
-        .shared => true,
-        .constant, .local, .input, .output, .uniform, .push_constant, .storage_buffer => true,
+        .physical_storage_buffer => false,
+        .shared => !target.cpu.features.isEnabled(@intFromEnum(std.Target.spirv.Feature.variable_pointers)),
+        .constant,
+        .local,
+        .input,
+        .output,
+        .uniform,
+        .push_constant,
+        .storage_buffer,
+        => true,
         else => unreachable,
     };
 }

From dacd70fbe41d959bb7b48b5bad8612e74231524b Mon Sep 17 00:00:00 2001
From: Ali Cheraghi <alichraghi@proton.me>
Date: Wed, 7 May 2025 20:25:06 +0330
Subject: [PATCH 3/6] spirv: super basic composite int support

---
 src/Zcu.zig                                   |   2 +-
 src/codegen/spirv.zig                         | 162 +++++++++++-------
 src/codegen/spirv/Module.zig                  |   7 +-
 src/target.zig                                |   3 +-
 .../compile_errors/@import_zon_bad_type.zig   |   6 +-
 .../anytype_param_requires_comptime.zig       |   2 +-
 .../bogus_method_call_on_slice.zig            |   2 +-
 .../compile_errors/coerce_anon_struct.zig     |   2 +-
 test/cases/compile_errors/redundant_try.zig   |   4 +-
 test/tests.zig                                |   2 +-
 10 files changed, 120 insertions(+), 72 deletions(-)

diff --git a/src/Zcu.zig b/src/Zcu.zig
index bee7fadb95..b8118b3f0b 100644
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@@ -3693,7 +3693,7 @@ pub fn errorSetBits(zcu: *const Zcu) u16 {
     const target = zcu.getTarget();
 
     if (zcu.error_limit == 0) return 0;
-    if (target.cpu.arch == .spirv64) {
+    if (target.cpu.arch.isSpirV()) {
         if (!std.Target.spirv.featureSetHas(target.cpu.features, .storage_push_constant16)) {
             return 32;
         }
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index b5bba61016..2732a0a617 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -30,6 +30,7 @@ const SpvAssembler = @import("spirv/Assembler.zig");
 const InstMap = std.AutoHashMapUnmanaged(Air.Inst.Index, IdRef);
 
 pub const zig_call_abi_ver = 3;
+pub const big_int_bits = 32;
 
 const InternMap = std.AutoHashMapUnmanaged(struct { InternPool.Index, NavGen.Repr }, IdResult);
 const PtrTypeMap = std.AutoHashMapUnmanaged(
@@ -376,7 +377,7 @@ const NavGen = struct {
         /// The number of bits required to store the type.
         /// For `integer` and `float`, this is equal to `bits`.
         /// For `strange_integer` and `bool` this is the size of the backing integer.
-        /// For `composite_integer` this is 0 (TODO)
+        /// For `composite_integer` this is the elements count.
         backing_bits: u16,
 
         /// Null if this type is a scalar, or the length
@@ -579,11 +580,13 @@ const NavGen = struct {
     /// The backing type will be chosen as the smallest supported integer larger or equal to it in number of bits.
     /// The result is valid to be used with OpTypeInt.
     /// TODO: Should the result of this function be cached?
-    fn backingIntBits(self: *NavGen, bits: u16) ?u16 {
+    fn backingIntBits(self: *NavGen, bits: u16) struct { u16, bool } {
         // The backend will never be asked to compiler a 0-bit integer, so we won't have to handle those in this function.
         assert(bits != 0);
 
-        if (self.spv.hasFeature(.arbitrary_precision_integers) and bits <= 32) return bits;
+        if (self.spv.hasFeature(.arbitrary_precision_integers) and bits <= 32) {
+            return .{ bits, false };
+        }
 
         // We require Int8 and Int16 capabilities and benefit Int64 when available.
         // 32-bit integers are always supported (see spec, 2.16.1, Data rules).
@@ -596,10 +599,11 @@ const NavGen = struct {
 
         for (ints) |int| {
             const has_feature = if (int.feature) |feature| self.spv.hasFeature(feature) else true;
-            if (bits <= int.bits and has_feature) return int.bits;
+            if (bits <= int.bits and has_feature) return .{ int.bits, false };
         }
 
-        return null;
+        // Big int
+        return .{ std.mem.alignForward(u16, bits, big_int_bits), true };
     }
 
     /// Return the amount of bits in the largest supported integer type. This is either 32 (always supported), or 64 (if
@@ -623,7 +627,7 @@ const NavGen = struct {
         return switch (scalar_ty.zigTypeTag(zcu)) {
             .bool => .{
                 .bits = 1, // Doesn't matter for this class.
-                .backing_bits = self.backingIntBits(1).?,
+                .backing_bits = self.backingIntBits(1).@"0",
                 .vector_len = vector_len,
                 .signedness = .unsigned, // Technically, but doesn't matter for this class.
                 .class = .bool,
@@ -638,19 +642,16 @@ const NavGen = struct {
             .int => blk: {
                 const int_info = scalar_ty.intInfo(zcu);
                 // TODO: Maybe it's useful to also return this value.
-                const maybe_backing_bits = self.backingIntBits(int_info.bits);
+                const backing_bits, const big_int = self.backingIntBits(int_info.bits);
                 break :blk .{
                     .bits = int_info.bits,
-                    .backing_bits = maybe_backing_bits orelse 0,
+                    .backing_bits = backing_bits,
                     .vector_len = vector_len,
                     .signedness = int_info.signedness,
-                    .class = if (maybe_backing_bits) |backing_bits|
-                        if (backing_bits == int_info.bits)
-                            .integer
-                        else
-                            .strange_integer
-                    else
-                        .composite_integer,
+                    .class = class: {
+                        if (big_int) break :class .composite_integer;
+                        break :class if (backing_bits == int_info.bits) .integer else .strange_integer;
+                    },
                 };
             },
             .@"enum" => unreachable,
@@ -659,6 +660,34 @@ const NavGen = struct {
         };
     }
 
+    /// Checks whether the type can be directly translated to SPIR-V vectors
+    fn isSpvVector(self: *NavGen, ty: Type) bool {
+        const zcu = self.pt.zcu;
+        if (ty.zigTypeTag(zcu) != .vector) return false;
+
+        // TODO: This check must be expanded for types that can be represented
+        // as integers (enums / packed structs?) and types that are represented
+        // by multiple SPIR-V values.
+        const scalar_ty = ty.scalarType(zcu);
+        switch (scalar_ty.zigTypeTag(zcu)) {
+            .bool,
+            .int,
+            .float,
+            => {},
+            else => return false,
+        }
+
+        const elem_ty = ty.childType(zcu);
+        const len = ty.vectorLen(zcu);
+
+        if (elem_ty.isNumeric(zcu) or elem_ty.toIntern() == .bool_type) {
+            if (len > 1 and len <= 4) return true;
+            if (self.spv.hasFeature(.vector16)) return (len == 8 or len == 16);
+        }
+
+        return false;
+    }
+
     /// Emits a bool constant in a particular representation.
     fn constBool(self: *NavGen, value: bool, repr: Repr) !IdRef {
         return switch (repr) {
@@ -675,14 +704,26 @@ const NavGen = struct {
         const scalar_ty = ty.scalarType(zcu);
         const int_info = scalar_ty.intInfo(zcu);
         // Use backing bits so that negatives are sign extended
-        const backing_bits = self.backingIntBits(int_info.bits).?; // Assertion failure means big int
+        const backing_bits, const big_int = self.backingIntBits(int_info.bits);
         assert(backing_bits != 0); // u0 is comptime
 
+        const result_ty_id = try self.resolveType(scalar_ty, .indirect);
         const signedness: Signedness = switch (@typeInfo(@TypeOf(value))) {
             .int => |int| int.signedness,
             .comptime_int => if (value < 0) .signed else .unsigned,
             else => unreachable,
         };
+        if (@sizeOf(@TypeOf(value)) >= 4 and big_int) {
+            const value64: u64 = switch (signedness) {
+                .signed => @bitCast(@as(i64, @intCast(value))),
+                .unsigned => @as(u64, @intCast(value)),
+            };
+            assert(backing_bits == 64);
+            return self.constructComposite(result_ty_id, &.{
+                try self.constInt(.u32, @as(u32, @truncate(value64))),
+                try self.constInt(.u32, @as(u32, @truncate(value64 << 32))),
+            });
+        }
 
         const final_value: spec.LiteralContextDependentNumber = blk: {
             if (self.spv.hasFeature(.kernel)) {
@@ -700,18 +741,17 @@ const NavGen = struct {
                 break :blk switch (backing_bits) {
                     1...32 => .{ .uint32 = @truncate(truncated_value) },
                     33...64 => .{ .uint64 = truncated_value },
-                    else => unreachable, // TODO: Large integer constants
+                    else => unreachable,
                 };
             }
 
             break :blk switch (backing_bits) {
                 1...32 => if (signedness == .signed) .{ .int32 = @intCast(value) } else .{ .uint32 = @intCast(value) },
                 33...64 => if (signedness == .signed) .{ .int64 = value } else .{ .uint64 = value },
-                else => unreachable, // TODO: Large integer constants
+                else => unreachable,
             };
         };
 
-        const result_ty_id = try self.resolveType(scalar_ty, .indirect);
         const result_id = try self.spv.constant(result_ty_id, final_value);
 
         if (!ty.isVector(zcu)) return result_id;
@@ -949,7 +989,7 @@ const NavGen = struct {
                             // TODO: composite int
                             // TODO: endianness
                             const bits: u16 = @intCast(ty.bitSize(zcu));
-                            const bytes = std.mem.alignForward(u16, self.backingIntBits(bits).?, 8) / 8;
+                            const bytes = std.mem.alignForward(u16, self.backingIntBits(bits).@"0", 8) / 8;
                             var limbs: [8]u8 = undefined;
                             @memset(&limbs, 0);
                             val.writeToPackedMemory(ty, pt, limbs[0..bytes], 0) catch unreachable;
@@ -1068,19 +1108,11 @@ const NavGen = struct {
                 const parent_ptr_id = try self.derivePtr(oac.parent.*);
                 const parent_ptr_ty = try oac.parent.ptrType(pt);
                 const result_ty_id = try self.resolveType(oac.new_ptr_ty, .direct);
+                const child_size = oac.new_ptr_ty.childType(zcu).abiSize(zcu);
 
-                if (oac.byte_offset != 0) {
-                    const child_size = oac.new_ptr_ty.childType(zcu).abiSize(zcu);
-                    if (oac.byte_offset % child_size != 0) {
-                        return self.fail("cannot perform pointer cast: '{}' to '{}'", .{
-                            parent_ptr_ty.fmt(pt),
-                            oac.new_ptr_ty.fmt(pt),
-                        });
-                    }
-
+                if (parent_ptr_ty.childType(zcu).isVector(zcu) and oac.byte_offset % child_size == 0) {
                     // Vector element ptr accesses are derived as offset_and_cast.
                     // We can just use OpAccessChain.
-                    assert(parent_ptr_ty.childType(zcu).zigTypeTag(zcu) == .vector);
                     return self.accessChain(
                         result_ty_id,
                         parent_ptr_id,
@@ -1088,15 +1120,22 @@ const NavGen = struct {
                     );
                 }
 
-                // Allow changing the pointer type child only to restructure arrays.
-                // e.g. [3][2]T to T is fine, as is [2]T -> [2][1]T.
-                const result_ptr_id = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
-                    .id_result_type = result_ty_id,
-                    .id_result = result_ptr_id,
-                    .operand = parent_ptr_id,
+                if (oac.byte_offset == 0) {
+                    // Allow changing the pointer type child only to restructure arrays.
+                    // e.g. [3][2]T to T is fine, as is [2]T -> [2][1]T.
+                    const result_ptr_id = self.spv.allocId();
+                    try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
+                        .id_result_type = result_ty_id,
+                        .id_result = result_ptr_id,
+                        .operand = parent_ptr_id,
+                    });
+                    return result_ptr_id;
+                }
+
+                return self.fail("cannot perform pointer cast: '{}' to '{}'", .{
+                    parent_ptr_ty.fmt(pt),
+                    oac.new_ptr_ty.fmt(pt),
                 });
-                return result_ptr_id;
             },
         }
     }
@@ -1217,11 +1256,14 @@ const NavGen = struct {
     /// actual operations (as well as store) a Zig type of a particular number of bits. To create
     /// a type with an exact size, use SpvModule.intType.
     fn intType(self: *NavGen, signedness: std.builtin.Signedness, bits: u16) !IdRef {
-        const backing_bits = self.backingIntBits(bits) orelse {
-            // TODO: Integers too big for any native type are represented as "composite integers":
-            // An array of largestSupportedIntBits.
-            return self.todo("Implement {s} composite int type of {} bits", .{ @tagName(signedness), bits });
-        };
+        const backing_bits, const big_int = self.backingIntBits(bits);
+        if (big_int) {
+            if (backing_bits > 64) {
+                return self.fail("composite integers larger than 64bit aren't supported", .{});
+            }
+            const int_ty = try self.resolveType(.u32, .direct);
+            return self.arrayType(backing_bits / big_int_bits, int_ty);
+        }
 
         // Kernel only supports unsigned ints.
         if (self.spv.hasFeature(.kernel)) {
@@ -1509,6 +1551,17 @@ const NavGen = struct {
                     return result_id;
                 }
             },
+            .vector => {
+                const elem_ty = ty.childType(zcu);
+                const elem_ty_id = try self.resolveType(elem_ty, repr);
+                const len = ty.vectorLen(zcu);
+
+                if (self.isSpvVector(ty)) {
+                    return try self.spv.vectorType(len, elem_ty_id);
+                } else {
+                    return try self.arrayType(len, elem_ty_id);
+                }
+            },
             .@"fn" => switch (repr) {
                 .direct => {
                     const fn_info = zcu.typeToFunc(ty).?;
@@ -1577,12 +1630,6 @@ const NavGen = struct {
                 );
                 return result_id;
             },
-            .vector => {
-                const elem_ty = ty.childType(zcu);
-                const elem_ty_id = try self.resolveType(elem_ty, repr);
-                const len = ty.vectorLen(zcu);
-                return self.arrayType(len, elem_ty_id);
-            },
             .@"struct" => {
                 const struct_type = switch (ip.indexToKey(ty.toIntern())) {
                     .tuple_type => |tuple| {
@@ -3378,8 +3425,7 @@ const NavGen = struct {
         const zcu = self.pt.zcu;
         const ty = value.ty;
         switch (info.class) {
-            .integer, .bool, .float => return value,
-            .composite_integer => unreachable, // TODO
+            .composite_integer, .integer, .bool, .float => return value,
             .strange_integer => switch (info.signedness) {
                 .unsigned => {
                     const mask_value = if (info.bits == 64) 0xFFFF_FFFF_FFFF_FFFF else (@as(u64, 1) << @as(u6, @intCast(info.bits))) - 1;
@@ -5039,7 +5085,7 @@ const NavGen = struct {
                     const mask_id = try self.constInt(object_ty, (@as(u64, 1) << @as(u6, @intCast(field_bit_size))) - 1);
                     const masked = try self.buildBinary(.bit_and, shift, .{ .ty = object_ty, .value = .{ .singleton = mask_id } });
                     const result_id = blk: {
-                        if (self.backingIntBits(field_bit_size).? == self.backingIntBits(@intCast(object_ty.bitSize(zcu))).?)
+                        if (self.backingIntBits(field_bit_size).@"0" == self.backingIntBits(@intCast(object_ty.bitSize(zcu))).@"0")
                             break :blk try self.bitCast(field_int_ty, object_ty, try masked.materialize(self));
                         const trunc = try self.buildConvert(field_int_ty, masked);
                         break :blk try trunc.materialize(self);
@@ -5063,7 +5109,7 @@ const NavGen = struct {
                         .{ .ty = backing_int_ty, .value = .{ .singleton = mask_id } },
                     );
                     const result_id = blk: {
-                        if (self.backingIntBits(field_bit_size).? == self.backingIntBits(@intCast(backing_int_ty.bitSize(zcu))).?)
+                        if (self.backingIntBits(field_bit_size).@"0" == self.backingIntBits(@intCast(backing_int_ty.bitSize(zcu))).@"0")
                             break :blk try self.bitCast(int_ty, backing_int_ty, try masked.materialize(self));
                         const trunc = try self.buildConvert(int_ty, masked);
                         break :blk try trunc.materialize(self);
@@ -6100,17 +6146,15 @@ const NavGen = struct {
             .bool, .error_set => 1,
             .int => blk: {
                 const bits = cond_ty.intInfo(zcu).bits;
-                const backing_bits = self.backingIntBits(bits) orelse {
-                    return self.todo("implement composite int switch", .{});
-                };
+                const backing_bits, const big_int = self.backingIntBits(bits);
+                if (big_int) return self.todo("implement composite int switch", .{});
                 break :blk if (backing_bits <= 32) 1 else 2;
             },
             .@"enum" => blk: {
                 const int_ty = cond_ty.intTagType(zcu);
                 const int_info = int_ty.intInfo(zcu);
-                const backing_bits = self.backingIntBits(int_info.bits) orelse {
-                    return self.todo("implement composite int switch", .{});
-                };
+                const backing_bits, const big_int = self.backingIntBits(int_info.bits);
+                if (big_int) return self.todo("implement composite int switch", .{});
                 break :blk if (backing_bits <= 32) 1 else 2;
             },
             .pointer => blk: {
diff --git a/src/codegen/spirv/Module.zig b/src/codegen/spirv/Module.zig
index 16c32c26d5..920215bee1 100644
--- a/src/codegen/spirv/Module.zig
+++ b/src/codegen/spirv/Module.zig
@@ -369,8 +369,11 @@ pub fn finalize(self: *Module, a: Allocator) ![]Word {
     // Emit memory model
     const addressing_model: spec.AddressingModel = blk: {
         if (self.hasFeature(.shader)) {
-            assert(self.target.cpu.arch == .spirv64);
-            if (self.hasFeature(.physical_storage_buffer)) break :blk .PhysicalStorageBuffer64;
+            if (self.hasFeature(.physical_storage_buffer)) {
+                assert(self.target.cpu.arch == .spirv64);
+                break :blk .PhysicalStorageBuffer64;
+            }
+            assert(self.target.cpu.arch == .spirv);
             break :blk .Logical;
         }
 
diff --git a/src/target.zig b/src/target.zig
index c5b2d97efb..6119b002a4 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -807,7 +807,8 @@ pub fn zigBackend(target: std.Target, use_llvm: bool) std.builtin.CompilerBacken
         .powerpc, .powerpcle, .powerpc64, .powerpc64le => .stage2_powerpc,
         .riscv64 => .stage2_riscv64,
         .sparc64 => .stage2_sparc64,
-        .spirv64 => .stage2_spirv64,
+        .spirv32 => if (target.os.tag == .opencl) .stage2_spirv64 else .other,
+        .spirv, .spirv64 => .stage2_spirv64,
         .wasm32, .wasm64 => .stage2_wasm,
         .x86 => .stage2_x86,
         .x86_64 => .stage2_x86_64,
diff --git a/test/cases/compile_errors/@import_zon_bad_type.zig b/test/cases/compile_errors/@import_zon_bad_type.zig
index 3265c6d92c..a2e13c4a6d 100644
--- a/test/cases/compile_errors/@import_zon_bad_type.zig
+++ b/test/cases/compile_errors/@import_zon_bad_type.zig
@@ -117,9 +117,9 @@ export fn testMutablePointer() void {
 // tmp.zig:37:38: note: imported here
 // neg_inf.zon:1:1: error: expected type '?u8'
 // tmp.zig:57:28: note: imported here
-// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_499'
+// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_501'
 // tmp.zig:62:39: note: imported here
-// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_501'
+// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_503'
 // tmp.zig:67:44: note: imported here
-// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_504'
+// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_506'
 // tmp.zig:72:50: note: imported here
diff --git a/test/cases/compile_errors/anytype_param_requires_comptime.zig b/test/cases/compile_errors/anytype_param_requires_comptime.zig
index 3546955e23..541a49a460 100644
--- a/test/cases/compile_errors/anytype_param_requires_comptime.zig
+++ b/test/cases/compile_errors/anytype_param_requires_comptime.zig
@@ -15,6 +15,6 @@ pub export fn entry() void {
 // error
 //
 // :7:25: error: unable to resolve comptime value
-// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_473.C' must be comptime-known
+// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_475.C' must be comptime-known
 // :4:16: note: struct requires comptime because of this field
 // :4:16: note: types are not available at runtime
diff --git a/test/cases/compile_errors/bogus_method_call_on_slice.zig b/test/cases/compile_errors/bogus_method_call_on_slice.zig
index fe30379476..598c04d2c5 100644
--- a/test/cases/compile_errors/bogus_method_call_on_slice.zig
+++ b/test/cases/compile_errors/bogus_method_call_on_slice.zig
@@ -16,5 +16,5 @@ pub export fn entry2() void {
 //
 // :3:6: error: no field or member function named 'copy' in '[]const u8'
 // :9:8: error: no field or member function named 'bar' in '@TypeOf(.{})'
-// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_477'
+// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_479'
 // :12:6: note: struct declared here
diff --git a/test/cases/compile_errors/coerce_anon_struct.zig b/test/cases/compile_errors/coerce_anon_struct.zig
index 75e27ddbed..ec5cf966d9 100644
--- a/test/cases/compile_errors/coerce_anon_struct.zig
+++ b/test/cases/compile_errors/coerce_anon_struct.zig
@@ -6,6 +6,6 @@ export fn foo() void {
 
 // error
 //
-// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_466'
+// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_468'
 // :3:16: note: struct declared here
 // :1:11: note: struct declared here
diff --git a/test/cases/compile_errors/redundant_try.zig b/test/cases/compile_errors/redundant_try.zig
index 2a3488c413..1f44cc05dc 100644
--- a/test/cases/compile_errors/redundant_try.zig
+++ b/test/cases/compile_errors/redundant_try.zig
@@ -44,9 +44,9 @@ comptime {
 //
 // :5:23: error: expected error union type, found 'comptime_int'
 // :10:23: error: expected error union type, found '@TypeOf(.{})'
-// :15:23: error: expected error union type, found 'tmp.test2__struct_503'
+// :15:23: error: expected error union type, found 'tmp.test2__struct_505'
 // :15:23: note: struct declared here
-// :20:27: error: expected error union type, found 'tmp.test3__struct_505'
+// :20:27: error: expected error union type, found 'tmp.test3__struct_507'
 // :20:27: note: struct declared here
 // :25:23: error: expected error union type, found 'struct { comptime *const [5:0]u8 = "hello" }'
 // :31:13: error: expected error union type, found 'u32'
diff --git a/test/tests.zig b/test/tests.zig
index 04c89444df..cc1da4cf9f 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -145,7 +145,7 @@ const test_targets = blk: {
         .{
             .target = std.Target.Query.parse(.{
                 .arch_os_abi = "spirv64-vulkan",
-                .cpu_features = "vulkan_v1_2+int64+float16+float64",
+                .cpu_features = "vulkan_v1_2+physical_storage_buffer+int64+float16+float64",
             }) catch unreachable,
             .use_llvm = false,
             .use_lld = false,

From 9209f4b16acc4453f89a06caabf54691f6253f62 Mon Sep 17 00:00:00 2001
From: Ali Cheraghi <alichraghi@proton.me>
Date: Fri, 9 May 2025 18:08:23 +0330
Subject: [PATCH 4/6] spirv: recognize builtin extern vars

---
 src/codegen/spirv.zig | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 2732a0a617..99f948e789 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -2972,6 +2972,46 @@ const NavGen = struct {
                     .storage_class = storage_class,
                 });
 
+                if (nav.fqn.eqlSlice("position", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .Position } });
+                } else if (nav.fqn.eqlSlice("point_size", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .PointSize } });
+                } else if (nav.fqn.eqlSlice("vertex_id", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .VertexId } });
+                } else if (nav.fqn.eqlSlice("instance_id", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InstanceId } });
+                } else if (nav.fqn.eqlSlice("invocation_id", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InvocationId } });
+                } else if (nav.fqn.eqlSlice("frag_coord", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FragCoord } });
+                } else if (nav.fqn.eqlSlice("point_coord", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .PointCoord } });
+                } else if (nav.fqn.eqlSlice("front_facing", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FrontFacing } });
+                } else if (nav.fqn.eqlSlice("sample_mask", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .SampleMask } });
+                } else if (nav.fqn.eqlSlice("sample_mask", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .SampleMask } });
+                } else if (nav.fqn.eqlSlice("frag_depth", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FragDepth } });
+                } else if (nav.fqn.eqlSlice("num_workgroups", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .NumWorkgroups } });
+                } else if (nav.fqn.eqlSlice("workgroup_size", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .WorkgroupSize } });
+                } else if (nav.fqn.eqlSlice("workgroup_id", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .WorkgroupId } });
+                } else if (nav.fqn.eqlSlice("local_invocation_id", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .LocalInvocationId } });
+                } else if (nav.fqn.eqlSlice("global_invocation_id", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .GlobalInvocationId } });
+                } else if (nav.fqn.eqlSlice("local_invocation_index", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .LocalInvocationIndex } });
+                } else if (nav.fqn.eqlSlice("vertex_index", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .VertexIndex } });
+                } else if (nav.fqn.eqlSlice("instance_index", ip)) {
+                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InstanceIndex } });
+                }
+
                 try self.spv.debugName(result_id, nav.fqn.toSlice(ip));
                 try self.spv.declareDeclDeps(spv_decl_index, &.{});
             },

From 8fa54eb7987bdb8138c625f03aa9fb91239dba48 Mon Sep 17 00:00:00 2001
From: Ali Cheraghi <alichraghi@proton.me>
Date: Sun, 11 May 2025 15:45:44 +0330
Subject: [PATCH 5/6] spirv: error when execution mode is set more than once

---
 lib/std/gpu.zig                 | 230 +++++++++++++-------------------
 src/codegen/spirv.zig           |   8 +-
 src/codegen/spirv/Assembler.zig |  22 ++-
 src/codegen/spirv/Module.zig    |  46 ++++---
 src/link/SpirV.zig              |   5 +-
 5 files changed, 144 insertions(+), 167 deletions(-)

diff --git a/lib/std/gpu.zig b/lib/std/gpu.zig
index d02b2424d4..d72d298b32 100644
--- a/lib/std/gpu.zig
+++ b/lib/std/gpu.zig
@@ -1,81 +1,24 @@
 const std = @import("std.zig");
 
-/// Will make `ptr` contain the location of the current invocation within the
-/// global workgroup. Each component is equal to the index of the local workgroup
-/// multiplied by the size of the local workgroup plus `localInvocationId`.
-/// `ptr` must be a reference to variable or struct field.
-pub fn globalInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn GlobalInvocationId
-        :
-        : [ptr] "" (ptr),
-    );
-}
-
-/// Will make that variable contain the location of the current cluster
-/// culling, task, mesh, or compute shader invocation within the local
-/// workgroup. Each component ranges from zero through to the size of the
-/// workgroup in that dimension minus one.
-/// `ptr` must be a reference to variable or struct field.
-pub fn localInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn LocalInvocationId
-        :
-        : [ptr] "" (ptr),
-    );
-}
-
-/// Output vertex position from a `Vertex` entrypoint
-/// `ptr` must be a reference to variable or struct field.
-pub fn position(comptime ptr: *addrspace(.output) @Vector(4, f32)) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn Position
-        :
-        : [ptr] "" (ptr),
-    );
-}
-
-/// Will make `ptr` contain the index of the vertex that is
-/// being processed by the current vertex shader invocation.
-/// `ptr` must be a reference to variable or struct field.
-pub fn vertexIndex(comptime ptr: *addrspace(.input) u32) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn VertexIndex
-        :
-        : [ptr] "" (ptr),
-    );
-}
-
-/// Will make `ptr` contain the index of the instance that is
-/// being processed by the current vertex shader invocation.
-/// `ptr` must be a reference to variable or struct field.
-pub fn instanceIndex(comptime ptr: *addrspace(.input) u32) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn InstanceIndex
-        :
-        : [ptr] "" (ptr),
-    );
-}
-
-/// Output fragment depth from a `Fragment` entrypoint
-/// `ptr` must be a reference to variable or struct field.
-pub fn fragmentCoord(comptime ptr: *addrspace(.input) @Vector(4, f32)) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn FragCoord
-        :
-        : [ptr] "" (ptr),
-    );
-}
-
-/// Output fragment depth from a `Fragment` entrypoint
-/// `ptr` must be a reference to variable or struct field.
-pub fn fragmentDepth(comptime ptr: *addrspace(.output) f32) void {
-    asm volatile (
-        \\OpDecorate %ptr BuiltIn FragDepth
-        :
-        : [ptr] "" (ptr),
-    );
-}
+pub const position_in = @extern(*addrspace(.input) @Vector(4, f32), .{ .name = "position" });
+pub const position_out = @extern(*addrspace(.output) @Vector(4, f32), .{ .name = "position" });
+pub const point_size_in = @extern(*addrspace(.input) f32, .{ .name = "point_size" });
+pub const point_size_out = @extern(*addrspace(.output) f32, .{ .name = "point_size" });
+pub extern const invocation_id: u32 addrspace(.input);
+pub extern const frag_coord: @Vector(4, f32) addrspace(.input);
+pub extern const point_coord: @Vector(2, f32) addrspace(.input);
+// TODO: direct/indirect values
+// pub extern const front_facing: bool addrspace(.input);
+// TODO: runtime array
+// pub extern const sample_mask;
+pub extern var frag_depth: f32 addrspace(.output);
+pub extern const num_workgroups: @Vector(3, u32) addrspace(.input);
+pub extern const workgroup_size: @Vector(3, u32) addrspace(.input);
+pub extern const workgroup_id: @Vector(3, u32) addrspace(.input);
+pub extern const local_invocation_id: @Vector(3, u32) addrspace(.input);
+pub extern const global_invocation_id: @Vector(3, u32) addrspace(.input);
+pub extern const vertex_index: u32 addrspace(.input);
+pub extern const instance_index: u32 addrspace(.input);
 
 /// Forms the main linkage for `input` and `output` address spaces.
 /// `ptr` must be a reference to variable or struct field.
@@ -101,74 +44,85 @@ pub fn binding(comptime ptr: anytype, comptime set: u32, comptime bind: u32) voi
     );
 }
 
-pub const Origin = enum(u32) {
-    /// Increase toward the right and downward
-    upper_left = 7,
-    /// Increase toward the right and upward
-    lower_left = 8,
-};
-
-/// The coordinates appear to originate in the specified `origin`.
-/// Only valid with the `Fragment` calling convention.
-pub fn fragmentOrigin(comptime entry_point: anytype, comptime origin: Origin) void {
-    asm volatile (
-        \\OpExecutionMode %entry_point $origin
-        :
-        : [entry_point] "" (entry_point),
-          [origin] "c" (@intFromEnum(origin)),
-    );
-}
-
-pub const DepthMode = enum(u32) {
-    /// Declares that this entry point dynamically writes the
-    /// `fragmentDepth` built in-decorated variable.
-    replacing = 12,
+pub const ExecutionMode = union(Tag) {
+    /// Sets origin of the framebuffer to the upper-left corner
+    origin_upper_left,
+    /// Sets origin of the framebuffer to the lower-left corner
+    origin_lower_left,
+    /// Indicates that the fragment shader writes to `frag_depth`,
+    /// replacing the fixed-function depth value.
+    depth_replacing,
     /// Indicates that per-fragment tests may assume that
-    /// any `fragmentDepth` built in-decorated value written by the shader is
+    /// any `frag_depth` built in-decorated value written by the shader is
     /// greater-than-or-equal to the fragment’s interpolated depth value
-    greater = 14,
+    depth_greater,
     /// Indicates that per-fragment tests may assume that
-    /// any `fragmentDepth` built in-decorated value written by the shader is
+    /// any `frag_depth` built in-decorated value written by the shader is
     /// less-than-or-equal to the fragment’s interpolated depth value
-    less = 15,
+    depth_less,
     /// Indicates that per-fragment tests may assume that
-    /// any `fragmentDepth` built in-decorated value written by the shader is
+    /// any `frag_depth` built in-decorated value written by the shader is
     /// the same as the fragment’s interpolated depth value
-    unchanged = 16,
+    depth_unchanged,
+    /// Indicates the workgroup size in the x, y, and z dimensions.
+    local_size: LocalSize,
+
+    pub const Tag = enum(u32) {
+        origin_upper_left = 7,
+        origin_lower_left = 8,
+        depth_replacing = 12,
+        depth_greater = 14,
+        depth_less = 15,
+        depth_unchanged = 16,
+        local_size = 17,
+    };
+
+    pub const LocalSize = struct { x: u32, y: u32, z: u32 };
 };
 
-/// Only valid with the `Fragment` calling convention.
-pub fn depthMode(comptime entry_point: anytype, comptime mode: DepthMode) void {
-    asm volatile (
-        \\OpExecutionMode %entry_point $mode
-        :
-        : [entry_point] "" (entry_point),
-          [mode] "c" (mode),
-    );
-}
-
-/// Indicates the workgroup size in the `x`, `y`, and `z` dimensions.
-/// Only valid with the `GLCompute` or `Kernel` calling conventions.
-pub fn workgroupSize(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void {
-    asm volatile (
-        \\OpExecutionMode %entry_point LocalSize %x %y %z
-        :
-        : [entry_point] "" (entry_point),
-          [x] "c" (size[0]),
-          [y] "c" (size[1]),
-          [z] "c" (size[2]),
-    );
-}
-
-/// A hint to the client, which indicates the workgroup size in the `x`, `y`, and `z` dimensions.
-/// Only valid with the `GLCompute` or `Kernel` calling conventions.
-pub fn workgroupSizeHint(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void {
-    asm volatile (
-        \\OpExecutionMode %entry_point LocalSizeHint %x %y %z
-        :
-        : [entry_point] "" (entry_point),
-          [x] "c" (size[0]),
-          [y] "c" (size[1]),
-          [z] "c" (size[2]),
-    );
+/// Declare the mode entry point executes in.
+pub fn executionMode(comptime entry_point: anytype, comptime mode: ExecutionMode) void {
+    const cc = @typeInfo(@TypeOf(entry_point)).@"fn".calling_convention;
+    switch (mode) {
+        .origin_upper_left,
+        .origin_lower_left,
+        .depth_replacing,
+        .depth_greater,
+        .depth_less,
+        .depth_unchanged,
+        => {
+            if (cc != .spirv_fragment) {
+                @compileError(
+                    \\invalid execution mode '
+                ++ @tagName(mode) ++
+                    \\' for function with '
+                ++ @tagName(cc) ++
+                    \\' calling convention
+                );
+            }
+            asm volatile (
+                \\OpExecutionMode %entry_point $mode
+                :
+                : [entry_point] "" (entry_point),
+                  [mode] "c" (@intFromEnum(mode)),
+            );
+        },
+        .local_size => |size| {
+            if (cc != .spirv_kernel) {
+                @compileError(
+                    \\invalid execution mode 'local_size' for function with '
+                ++ @tagName(cc) ++
+                    \\' calling convention
+                );
+            }
+            asm volatile (
+                \\OpExecutionMode %entry_point LocalSize $x $y $z
+                :
+                : [entry_point] "" (entry_point),
+                  [x] "c" (size.x),
+                  [y] "c" (size.y),
+                  [z] "c" (size.z),
+            );
+        },
+    }
 }
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 99f948e789..b2ab76e2c7 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -2870,7 +2870,7 @@ const NavGen = struct {
         };
 
         try self.spv.declareDeclDeps(spv_decl_index, decl_deps.items);
-        try self.spv.declareEntryPoint(spv_decl_index, test_name, execution_mode);
+        try self.spv.declareEntryPoint(spv_decl_index, test_name, execution_mode, null);
     }
 
     fn genNav(self: *NavGen, do_codegen: bool) !void {
@@ -2976,10 +2976,6 @@ const NavGen = struct {
                     try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .Position } });
                 } else if (nav.fqn.eqlSlice("point_size", ip)) {
                     try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .PointSize } });
-                } else if (nav.fqn.eqlSlice("vertex_id", ip)) {
-                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .VertexId } });
-                } else if (nav.fqn.eqlSlice("instance_id", ip)) {
-                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InstanceId } });
                 } else if (nav.fqn.eqlSlice("invocation_id", ip)) {
                     try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InvocationId } });
                 } else if (nav.fqn.eqlSlice("frag_coord", ip)) {
@@ -2990,8 +2986,6 @@ const NavGen = struct {
                     try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FrontFacing } });
                 } else if (nav.fqn.eqlSlice("sample_mask", ip)) {
                     try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .SampleMask } });
-                } else if (nav.fqn.eqlSlice("sample_mask", ip)) {
-                    try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .SampleMask } });
                 } else if (nav.fqn.eqlSlice("frag_depth", ip)) {
                     try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FragDepth } });
                 } else if (nav.fqn.eqlSlice("num_workgroups", ip)) {
diff --git a/src/codegen/spirv/Assembler.zig b/src/codegen/spirv/Assembler.zig
index e4ad326006..2cf336b9c4 100644
--- a/src/codegen/spirv/Assembler.zig
+++ b/src/codegen/spirv/Assembler.zig
@@ -296,12 +296,26 @@ fn processInstruction(self: *Assembler) !void {
             };
             break :blk .{ .value = try self.spv.importInstructionSet(set_tag) };
         },
+        .OpExecutionMode, .OpExecutionModeId => {
+            assert(try self.processGenericInstruction() == null);
+            const entry_point_id = try self.resolveRefId(self.inst.operands.items[0].ref_id);
+            const exec_mode: spec.ExecutionMode = @enumFromInt(self.inst.operands.items[1].value);
+            const gop = try self.spv.entry_points.getOrPut(self.gpa, entry_point_id);
+            if (!gop.found_existing) {
+                gop.value_ptr.* = .{};
+            } else if (gop.value_ptr.exec_mode != null) {
+                return self.fail(
+                    self.currentToken().start,
+                    "cannot set execution mode more than once to any entry point",
+                    .{},
+                );
+            }
+            gop.value_ptr.exec_mode = exec_mode;
+            return;
+        },
         else => switch (self.inst.opcode.class()) {
             .TypeDeclaration => try self.processTypeInstruction(),
-            else => if (try self.processGenericInstruction()) |result|
-                result
-            else
-                return,
+            else => (try self.processGenericInstruction()) orelse return,
         },
     };
 
diff --git a/src/codegen/spirv/Module.zig b/src/codegen/spirv/Module.zig
index 920215bee1..691749bf1d 100644
--- a/src/codegen/spirv/Module.zig
+++ b/src/codegen/spirv/Module.zig
@@ -92,11 +92,12 @@ pub const Decl = struct {
 /// This models a kernel entry point.
 pub const EntryPoint = struct {
     /// The declaration that should be exported.
-    decl_index: Decl.Index,
+    decl_index: ?Decl.Index = null,
     /// The name of the kernel to be exported.
-    name: []const u8,
+    name: ?[]const u8 = null,
     /// Calling Convention
-    execution_model: spec.ExecutionModel,
+    exec_model: ?spec.ExecutionModel = null,
+    exec_mode: ?spec.ExecutionMode = null,
 };
 
 /// A general-purpose allocator which may be used to allocate resources for this module
@@ -184,7 +185,7 @@ decls: std.ArrayListUnmanaged(Decl) = .empty,
 decl_deps: std.ArrayListUnmanaged(Decl.Index) = .empty,
 
 /// The list of entry points that should be exported from this module.
-entry_points: std.ArrayListUnmanaged(EntryPoint) = .empty,
+entry_points: std.AutoArrayHashMapUnmanaged(IdRef, EntryPoint) = .empty,
 
 pub fn init(gpa: Allocator, target: std.Target) Module {
     const version_minor: u8 = blk: {
@@ -304,19 +305,30 @@ fn entryPoints(self: *Module) !Section {
     var seen = try std.DynamicBitSetUnmanaged.initEmpty(self.gpa, self.decls.items.len);
     defer seen.deinit(self.gpa);
 
-    for (self.entry_points.items) |entry_point| {
+    for (self.entry_points.keys(), self.entry_points.values()) |entry_point_id, entry_point| {
         interface.items.len = 0;
         seen.setRangeValue(.{ .start = 0, .end = self.decls.items.len }, false);
 
-        try self.addEntryPointDeps(entry_point.decl_index, &seen, &interface);
-
-        const entry_point_id = self.declPtr(entry_point.decl_index).result_id;
+        try self.addEntryPointDeps(entry_point.decl_index.?, &seen, &interface);
         try entry_points.emit(self.gpa, .OpEntryPoint, .{
-            .execution_model = entry_point.execution_model,
+            .execution_model = entry_point.exec_model.?,
             .entry_point = entry_point_id,
-            .name = entry_point.name,
+            .name = entry_point.name.?,
             .interface = interface.items,
         });
+
+        if (entry_point.exec_mode == null and entry_point.exec_model == .Fragment) {
+            switch (self.target.os.tag) {
+                .vulkan, .opengl => |tag| {
+                    try self.sections.execution_modes.emit(self.gpa, .OpExecutionMode, .{
+                        .entry_point = entry_point_id,
+                        .mode = if (tag == .vulkan) .OriginUpperLeft else .OriginLowerLeft,
+                    });
+                },
+                .opencl => {},
+                else => unreachable,
+            }
+        }
     }
 
     return entry_points;
@@ -749,13 +761,15 @@ pub fn declareEntryPoint(
     self: *Module,
     decl_index: Decl.Index,
     name: []const u8,
-    execution_model: spec.ExecutionModel,
+    exec_model: spec.ExecutionModel,
+    exec_mode: ?spec.ExecutionMode,
 ) !void {
-    try self.entry_points.append(self.gpa, .{
-        .decl_index = decl_index,
-        .name = try self.arena.allocator().dupe(u8, name),
-        .execution_model = execution_model,
-    });
+    const gop = try self.entry_points.getOrPut(self.gpa, self.declPtr(decl_index).result_id);
+    gop.value_ptr.decl_index = decl_index;
+    gop.value_ptr.name = try self.arena.allocator().dupe(u8, name);
+    gop.value_ptr.exec_model = exec_model;
+    // Might've been set by assembler
+    if (!gop.found_existing) gop.value_ptr.exec_mode = exec_mode;
 }
 
 pub fn debugName(self: *Module, target: IdResult, name: []const u8) !void {
diff --git a/src/link/SpirV.zig b/src/link/SpirV.zig
index f5e569ce69..f3c2922725 100644
--- a/src/link/SpirV.zig
+++ b/src/link/SpirV.zig
@@ -162,7 +162,7 @@ pub fn updateExports(
     if (ip.isFunctionType(nav_ty)) {
         const spv_decl_index = try self.object.resolveNav(zcu, nav_index);
         const cc = Type.fromInterned(nav_ty).fnCallingConvention(zcu);
-        const execution_model: spec.ExecutionModel = switch (target.os.tag) {
+        const exec_model: spec.ExecutionModel = switch (target.os.tag) {
             .vulkan, .opengl => switch (cc) {
                 .spirv_vertex => .Vertex,
                 .spirv_fragment => .Fragment,
@@ -185,7 +185,8 @@ pub fn updateExports(
             try self.object.spv.declareEntryPoint(
                 spv_decl_index,
                 exp.opts.name.toSlice(ip),
-                execution_model,
+                exec_model,
+                null,
             );
         }
     }

From 4bf1e4d198abd2018bf23f9067617800a2bc0554 Mon Sep 17 00:00:00 2001
From: Ali Cheraghi <alichraghi@proton.me>
Date: Wed, 21 May 2025 15:26:18 +0330
Subject: [PATCH 6/6] target: auto-generated spirv features

---
 lib/std/Target/spirv.zig      | 217 ++++++++++++++++++++--------------
 tools/update_cpu_features.zig | 122 +++++++++++++++++++
 2 files changed, 253 insertions(+), 86 deletions(-)

diff --git a/lib/std/Target/spirv.zig b/lib/std/Target/spirv.zig
index 90abacdd08..229d77a6d6 100644
--- a/lib/std/Target/spirv.zig
+++ b/lib/std/Target/spirv.zig
@@ -1,8 +1,21 @@
+//! This file is auto-generated by tools/update_cpu_features.zig.
+
 const std = @import("../std.zig");
 const CpuFeature = std.Target.Cpu.Feature;
 const CpuModel = std.Target.Cpu.Model;
 
 pub const Feature = enum {
+    addresses,
+    arbitrary_precision_integers,
+    float16,
+    float64,
+    generic_pointer,
+    int64,
+    kernel,
+    matrix,
+    physical_storage_buffer,
+    shader,
+    storage_push_constant16,
     v1_0,
     v1_1,
     v1_2,
@@ -10,19 +23,8 @@ pub const Feature = enum {
     v1_4,
     v1_5,
     v1_6,
-    int64,
-    float16,
-    float64,
-    matrix,
-    storage_push_constant16,
-    arbitrary_precision_integers,
-    kernel,
-    addresses,
-    generic_pointer,
-    vector16,
-    shader,
     variable_pointers,
-    physical_storage_buffer,
+    vector16,
 };
 
 pub const featureSet = CpuFeature.FeatureSetFns(Feature).featureSet;
@@ -35,6 +37,83 @@ pub const all_features = blk: {
     const len = @typeInfo(Feature).@"enum".fields.len;
     std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
     var result: [len]CpuFeature = undefined;
+    result[@intFromEnum(Feature.addresses)] = .{
+        .llvm_name = null,
+        .description = "Enable Addresses capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.arbitrary_precision_integers)] = .{
+        .llvm_name = null,
+        .description = "Enable SPV_INTEL_arbitrary_precision_integers extension and the ArbitraryPrecisionIntegersINTEL capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_5,
+        }),
+    };
+    result[@intFromEnum(Feature.float16)] = .{
+        .llvm_name = null,
+        .description = "Enable Float16 capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.float64)] = .{
+        .llvm_name = null,
+        .description = "Enable Float64 capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.generic_pointer)] = .{
+        .llvm_name = null,
+        .description = "Enable GenericPointer capability",
+        .dependencies = featureSet(&[_]Feature{
+            .addresses,
+        }),
+    };
+    result[@intFromEnum(Feature.int64)] = .{
+        .llvm_name = null,
+        .description = "Enable Int64 capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.kernel)] = .{
+        .llvm_name = null,
+        .description = "Enable Kernel capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.matrix)] = .{
+        .llvm_name = null,
+        .description = "Enable Matrix capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.physical_storage_buffer)] = .{
+        .llvm_name = null,
+        .description = "Enable SPV_KHR_variable_pointers extension and the (VariablePointers, VariablePointersStorageBuffer) capabilities",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
+    };
+    result[@intFromEnum(Feature.shader)] = .{
+        .llvm_name = null,
+        .description = "Enable Shader capability",
+        .dependencies = featureSet(&[_]Feature{
+            .matrix,
+        }),
+    };
+    result[@intFromEnum(Feature.storage_push_constant16)] = .{
+        .llvm_name = null,
+        .description = "Enable SPV_KHR_16bit_storage extension and the StoragePushConstant16 capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_3,
+        }),
+    };
     result[@intFromEnum(Feature.v1_0)] = .{
         .llvm_name = null,
         .description = "Enable version 1.0",
@@ -43,97 +122,58 @@ pub const all_features = blk: {
     result[@intFromEnum(Feature.v1_1)] = .{
         .llvm_name = null,
         .description = "Enable version 1.1",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
     };
     result[@intFromEnum(Feature.v1_2)] = .{
         .llvm_name = null,
         .description = "Enable version 1.2",
-        .dependencies = featureSet(&[_]Feature{.v1_1}),
+        .dependencies = featureSet(&[_]Feature{
+            .v1_1,
+        }),
     };
     result[@intFromEnum(Feature.v1_3)] = .{
         .llvm_name = null,
         .description = "Enable version 1.3",
-        .dependencies = featureSet(&[_]Feature{.v1_2}),
+        .dependencies = featureSet(&[_]Feature{
+            .v1_2,
+        }),
     };
     result[@intFromEnum(Feature.v1_4)] = .{
         .llvm_name = null,
         .description = "Enable version 1.4",
-        .dependencies = featureSet(&[_]Feature{.v1_3}),
+        .dependencies = featureSet(&[_]Feature{
+            .v1_3,
+        }),
     };
     result[@intFromEnum(Feature.v1_5)] = .{
         .llvm_name = null,
         .description = "Enable version 1.5",
-        .dependencies = featureSet(&[_]Feature{.v1_4}),
+        .dependencies = featureSet(&[_]Feature{
+            .v1_4,
+        }),
     };
     result[@intFromEnum(Feature.v1_6)] = .{
         .llvm_name = null,
         .description = "Enable version 1.6",
-        .dependencies = featureSet(&[_]Feature{.v1_5}),
+        .dependencies = featureSet(&[_]Feature{
+            .v1_5,
+        }),
     };
-    result[@intFromEnum(Feature.int64)] = .{
+    result[@intFromEnum(Feature.variable_pointers)] = .{
         .llvm_name = null,
-        .description = "Enable Int64 capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.float16)] = .{
-        .llvm_name = null,
-        .description = "Enable Float16 capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.float64)] = .{
-        .llvm_name = null,
-        .description = "Enable Float64 capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.matrix)] = .{
-        .llvm_name = null,
-        .description = "Enable Matrix capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.storage_push_constant16)] = .{
-        .llvm_name = null,
-        .description = "Enable SPV_KHR_16bit_storage extension and the StoragePushConstant16 capability",
-        .dependencies = featureSet(&[_]Feature{.v1_3}),
-    };
-    result[@intFromEnum(Feature.arbitrary_precision_integers)] = .{
-        .llvm_name = null,
-        .description = "Enable SPV_INTEL_arbitrary_precision_integers extension and the ArbitraryPrecisionIntegersINTEL capability",
-        .dependencies = featureSet(&[_]Feature{.v1_5}),
-    };
-    result[@intFromEnum(Feature.kernel)] = .{
-        .llvm_name = null,
-        .description = "Enable Kernel capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.addresses)] = .{
-        .llvm_name = null,
-        .description = "Enable Addresses capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.generic_pointer)] = .{
-        .llvm_name = null,
-        .description = "Enable GenericPointer capability",
-        .dependencies = featureSet(&[_]Feature{ .v1_0, .addresses }),
+        .description = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
+        .dependencies = featureSet(&[_]Feature{
+            .v1_0,
+        }),
     };
     result[@intFromEnum(Feature.vector16)] = .{
         .llvm_name = null,
         .description = "Enable Vector16 capability",
-        .dependencies = featureSet(&[_]Feature{ .v1_0, .kernel }),
-    };
-    result[@intFromEnum(Feature.shader)] = .{
-        .llvm_name = null,
-        .description = "Enable Shader capability",
-        .dependencies = featureSet(&[_]Feature{ .v1_0, .matrix }),
-    };
-    result[@intFromEnum(Feature.physical_storage_buffer)] = .{
-        .llvm_name = null,
-        .description = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
-    };
-    result[@intFromEnum(Feature.variable_pointers)] = .{
-        .llvm_name = null,
-        .description = "Enable SPV_KHR_variable_pointers extension and the (VariablePointers, VariablePointersStorageBuffer) capabilities",
-        .dependencies = featureSet(&[_]Feature{.v1_0}),
+        .dependencies = featureSet(&[_]Feature{
+            .kernel,
+        }),
     };
     const ti = @typeInfo(Feature);
     for (&result, 0..) |*elem, i| {
@@ -147,18 +187,23 @@ pub const cpu = struct {
     pub const generic: CpuModel = .{
         .name = "generic",
         .llvm_name = "generic",
-        .features = featureSet(&[_]Feature{.v1_0}),
+        .features = featureSet(&[_]Feature{}),
     };
-
-    pub const vulkan_v1_2: CpuModel = .{
-        .name = "vulkan_v1_2",
-        .llvm_name = null,
-        .features = featureSet(&[_]Feature{ .v1_5, .shader }),
-    };
-
     pub const opencl_v2: CpuModel = .{
         .name = "opencl_v2",
         .llvm_name = null,
-        .features = featureSet(&[_]Feature{ .v1_2, .kernel, .addresses, .generic_pointer }),
+        .features = featureSet(&[_]Feature{
+            .generic_pointer,
+            .kernel,
+            .v1_2,
+        }),
+    };
+    pub const vulkan_v1_2: CpuModel = .{
+        .name = "vulkan_v1_2",
+        .llvm_name = null,
+        .features = featureSet(&[_]Feature{
+            .shader,
+            .v1_5,
+        }),
     };
 };
diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig
index 9ac7b7ef2a..f033195aac 100644
--- a/tools/update_cpu_features.zig
+++ b/tools/update_cpu_features.zig
@@ -1047,6 +1047,128 @@ const targets = [_]ArchTarget{
             },
         },
     },
+    .{
+        .zig_name = "spirv",
+        .llvm = .{
+            .name = "SPIRV",
+            .td_name = "SPIRV",
+        },
+        .branch_quota = 2000,
+        .extra_features = &.{
+            .{
+                .zig_name = "v1_0",
+                .desc = "Enable version 1.0",
+                .deps = &.{},
+            },
+            .{
+                .zig_name = "v1_1",
+                .desc = "Enable version 1.1",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "v1_2",
+                .desc = "Enable version 1.2",
+                .deps = &.{"v1_1"},
+            },
+            .{
+                .zig_name = "v1_3",
+                .desc = "Enable version 1.3",
+                .deps = &.{"v1_2"},
+            },
+            .{
+                .zig_name = "v1_4",
+                .desc = "Enable version 1.4",
+                .deps = &.{"v1_3"},
+            },
+            .{
+                .zig_name = "v1_5",
+                .desc = "Enable version 1.5",
+                .deps = &.{"v1_4"},
+            },
+            .{
+                .zig_name = "v1_6",
+                .desc = "Enable version 1.6",
+                .deps = &.{"v1_5"},
+            },
+            .{
+                .zig_name = "int64",
+                .desc = "Enable Int64 capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "float16",
+                .desc = "Enable Float16 capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "float64",
+                .desc = "Enable Float64 capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "matrix",
+                .desc = "Enable Matrix capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "storage_push_constant16",
+                .desc = "Enable SPV_KHR_16bit_storage extension and the StoragePushConstant16 capability",
+                .deps = &.{"v1_3"},
+            },
+            .{
+                .zig_name = "arbitrary_precision_integers",
+                .desc = "Enable SPV_INTEL_arbitrary_precision_integers extension and the ArbitraryPrecisionIntegersINTEL capability",
+                .deps = &.{"v1_5"},
+            },
+            .{
+                .zig_name = "kernel",
+                .desc = "Enable Kernel capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "addresses",
+                .desc = "Enable Addresses capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "generic_pointer",
+                .desc = "Enable GenericPointer capability",
+                .deps = &.{ "v1_0", "addresses" },
+            },
+            .{
+                .zig_name = "vector16",
+                .desc = "Enable Vector16 capability",
+                .deps = &.{ "v1_0", "kernel" },
+            },
+            .{
+                .zig_name = "shader",
+                .desc = "Enable Shader capability",
+                .deps = &.{ "v1_0", "matrix" },
+            },
+            .{
+                .zig_name = "variable_pointers",
+                .desc = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
+                .deps = &.{"v1_0"},
+            },
+            .{
+                .zig_name = "physical_storage_buffer",
+                .desc = "Enable SPV_KHR_variable_pointers extension and the (VariablePointers, VariablePointersStorageBuffer) capabilities",
+                .deps = &.{"v1_0"},
+            },
+        },
+        .extra_cpus = &.{
+            .{
+                .llvm_name = null,
+                .zig_name = "vulkan_v1_2",
+                .features = &.{ "v1_5", "shader" },
+            },
+            .{
+                .llvm_name = null,
+                .zig_name = "opencl_v2",
+                .features = &.{ "v1_2", "kernel", "addresses", "generic_pointer" },
+            },
+        },
+    },
     .{
         .zig_name = "riscv",
         .llvm = .{