Zir: split up start and end of range in for_len

The old lowering was kind of neat, but it unintentionally allowed the syntax `for (123) |_| { ... }`, and there wasn't really a way to fix that. So, instead, we include both the start and the end of the range in the `for_len` instruction (each operand to `for` now has *two* entries in this multi-op instruction). This slightly increases the size of ZIR for loops of predominantly indexables, but the difference is small enough that it's not worth complicating ZIR to try and fix it.
2026-02-21 16:54:52 +00:00 · 2025-01-03 07:54:53 +00:00 · 2025-01-03 07:54:53 +00:00 · b4da8eef2a
commit b4da8eef2a
parent 252c203101
4 changed files with 42 additions and 42 deletions
--- a/lib/std/zig/AstGen.zig
+++ b/lib/std/zig/AstGen.zig
@ -7020,7 +7020,7 @@ fn forExpr(
    const indexables = try gpa.alloc(Zir.Inst.Ref, for_full.ast.inputs.len);
    defer gpa.free(indexables);
    // elements of this array can be `none`, indicating no length check.
-    const lens = try gpa.alloc(Zir.Inst.Ref, for_full.ast.inputs.len);
+    const lens = try gpa.alloc([2]Zir.Inst.Ref, for_full.ast.inputs.len);
    defer gpa.free(lens);

    // We will use a single zero-based counter no matter how many indexables there are.
@ -7039,7 +7039,7 @@ fn forExpr(

    {
        var capture_token = for_full.payload_token;
-        for (for_full.ast.inputs, indexables, lens) |input, *indexable_ref, *len_ref| {
+        for (for_full.ast.inputs, indexables, lens) |input, *indexable_ref, *len_refs| {
            const capture_is_ref = token_tags[capture_token] == .asterisk;
            const ident_tok = capture_token + @intFromBool(capture_is_ref);
            const is_discard = mem.eql(u8, tree.tokenSlice(ident_tok), "_");
@ -7068,24 +7068,21 @@ fn forExpr(
                    try astgen.appendErrorTok(ident_tok, "discard of unbounded counter", .{});
                }

-                const start_is_zero = nodeIsTriviallyZero(tree, start_node);
-                const range_len = if (end_val == .none or start_is_zero)
-                    end_val
-                else
-                    try parent_gz.addPlNode(.sub, input, Zir.Inst.Bin{
-                        .lhs = end_val,
-                        .rhs = start_val,
-                    });
+                if (end_val == .none) {
+                    len_refs.* = .{ .none, .none };
+                } else {
+                    any_len_checks = true;
+                    len_refs.* = .{ start_val, end_val };
+                }

-                any_len_checks = any_len_checks or range_len != .none;
+                const start_is_zero = nodeIsTriviallyZero(tree, start_node);
                indexable_ref.* = if (start_is_zero) .none else start_val;
-                len_ref.* = range_len;
            } else {
                const indexable = try expr(parent_gz, scope, .{ .rl = .none }, input);

                any_len_checks = true;
                indexable_ref.* = indexable;
-                len_ref.* = indexable;
+                len_refs.* = .{ indexable, .none };
            }
        }
    }
@ -7097,12 +7094,13 @@ fn forExpr(
    // We use a dedicated ZIR instruction to assert the lengths to assist with
    // nicer error reporting as well as fewer ZIR bytes emitted.
    const len: Zir.Inst.Ref = len: {
-        const lens_len: u32 = @intCast(lens.len);
+        const all_lens = @as([*]Zir.Inst.Ref, @ptrCast(lens))[0 .. lens.len * 2];
+        const lens_len: u32 = @intCast(all_lens.len);
        try astgen.extra.ensureUnusedCapacity(gpa, @typeInfo(Zir.Inst.MultiOp).@"struct".fields.len + lens_len);
        const len = try parent_gz.addPlNode(.for_len, node, Zir.Inst.MultiOp{
            .operands_len = lens_len,
        });
-        appendRefsAssumeCapacity(astgen, lens);
+        appendRefsAssumeCapacity(astgen, all_lens);
        break :len len;
    };

--- a/lib/std/zig/Zir.zig
+++ b/lib/std/zig/Zir.zig
@ -526,8 +526,10 @@ pub const Inst = struct {
        /// Asserts that all the lengths provided match. Used to build a for loop.
        /// Return value is the length as a usize.
        /// Uses the `pl_node` field with payload `MultiOp`.
-        /// There is exactly one item corresponding to each AST node inside the for
-        /// loop condition. Any item may be `none`, indicating an unbounded range.
+        /// There are two items for each AST node inside the for loop condition.
+        /// If both items in a pair are `.none`, then this node is an unbounded range.
+        /// If only the second item in a pair is `.none`, then the first is an indexable.
+        /// Otherwise, the node is a bounded range `a..b`, with the items being `a` and `b`.
        /// Illegal behaviors:
        ///  * If all lengths are unbounded ranges (always a compile error).
        ///  * If any two lengths do not match each other.
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -4356,7 +4356,8 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
    const ip = &zcu.intern_pool;
    const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].pl_node;
    const extra = sema.code.extraData(Zir.Inst.MultiOp, inst_data.payload_index);
-    const args = sema.code.refSlice(extra.end, extra.data.operands_len);
+    const all_args = sema.code.refSlice(extra.end, extra.data.operands_len);
+    const arg_pairs: []const [2]Zir.Inst.Ref = @as([*]const [2]Zir.Inst.Ref, @ptrCast(all_args))[0..@divExact(all_args.len, 2)];
    const src = block.nodeOffset(inst_data.src_node);

    var len: Air.Inst.Ref = .none;
@ -4364,27 +4365,24 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
    var len_idx: u32 = undefined;
    var any_runtime = false;

-    const runtime_arg_lens = try gpa.alloc(Air.Inst.Ref, args.len);
+    const runtime_arg_lens = try gpa.alloc(Air.Inst.Ref, arg_pairs.len);
    defer gpa.free(runtime_arg_lens);

    // First pass to look for comptime values.
-    for (args, 0..) |zir_arg, i_usize| {
+    for (arg_pairs, 0..) |zir_arg_pair, i_usize| {
        const i: u32 = @intCast(i_usize);
        runtime_arg_lens[i] = .none;
-        if (zir_arg == .none) continue;
-        const object = try sema.resolveInst(zir_arg);
-        const object_ty = sema.typeOf(object);
-        // Each arg could be an indexable, or a range, in which case the length
-        // is passed directly as an integer.
-        const is_int = switch (object_ty.zigTypeTag(zcu)) {
-            .int, .comptime_int => true,
-            else => false,
-        };
+        if (zir_arg_pair[0] == .none) continue;
+
        const arg_src = block.src(.{ .for_input = .{
            .for_node_offset = inst_data.src_node,
            .input_index = i,
        } });
-        const arg_len_uncoerced = if (is_int) object else l: {
+
+        const arg_len_uncoerced = if (zir_arg_pair[1] == .none) l: {
+            // This argument is an indexable.
+            const object = try sema.resolveInst(zir_arg_pair[0]);
+            const object_ty = sema.typeOf(object);
            if (!object_ty.isIndexable(zcu)) {
                // Instead of using checkIndexable we customize this error.
                const msg = msg: {
@ -4401,8 +4399,12 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
                return sema.failWithOwnedErrorMsg(block, msg);
            }
            if (!object_ty.indexableHasLen(zcu)) continue;
-
            break :l try sema.fieldVal(block, arg_src, object, try ip.getOrPutString(gpa, pt.tid, "len", .no_embedded_nulls), arg_src);
+        } else l: {
+            // This argument is a range.
+            const range_start = try sema.resolveInst(zir_arg_pair[0]);
+            const range_end = try sema.resolveInst(zir_arg_pair[1]);
+            break :l try sema.analyzeArithmetic(block, .sub, range_end, range_start, arg_src, arg_src, arg_src, true);
        };
        const arg_len = try sema.coerce(block, Type.usize, arg_len_uncoerced, arg_src);
        if (len == .none) {
@ -4444,17 +4446,12 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
        const msg = msg: {
            const msg = try sema.errMsg(src, "unbounded for loop", .{});
            errdefer msg.destroy(gpa);
-            for (args, 0..) |zir_arg, i_usize| {
+            for (arg_pairs, 0..) |zir_arg_pair, i_usize| {
                const i: u32 = @intCast(i_usize);
-                if (zir_arg == .none) continue;
-                const object = try sema.resolveInst(zir_arg);
+                if (zir_arg_pair[0] == .none) continue;
+                if (zir_arg_pair[1] != .none) continue;
+                const object = try sema.resolveInst(zir_arg_pair[0]);
                const object_ty = sema.typeOf(object);
-                // Each arg could be an indexable, or a range, in which case the length
-                // is passed directly as an integer.
-                switch (object_ty.zigTypeTag(zcu)) {
-                    .int, .comptime_int => continue,
-                    else => {},
-                }
                const arg_src = block.src(.{ .for_input = .{
                    .for_node_offset = inst_data.src_node,
                    .input_index = i,
--- a/test/cases/compile_errors/for.zig
+++ b/test/cases/compile_errors/for.zig
@ -28,10 +28,11 @@ export fn d() void {
        _ = x3;
    }
 }
+export fn e() void {
+    for (123) |_| {}
+}

 // error
-// backend=stage2
-// target=native
 //
 // :2:5: error: non-matching for loop lengths
 // :2:11: note: length 10 here
@ -43,3 +44,5 @@ export fn d() void {
 // :25:5: error: unbounded for loop
 // :25:10: note: type '[*]const u8' has no upper bound
 // :25:18: note: type '[*]const u8' has no upper bound
+// :32:10: error: type 'comptime_int' is not indexable and not a range
+// :32:10: note: for loop operand must be a range, array, slice, tuple, or vector