std.Io.Reader: fix delimiter bugs

Fix `takeDelimiter` and `takeDelimiterExclusive` tossing too many bytes (#25132) Also add/improve test coverage for all delimiter and sentinel methods, update usages of `takeDelimiterExclusive` to not rely on the fixed bug, tweak a handful of doc comments, and slightly simplify some logic. I have not fixed #24950 in this commit because I am a little less certain about the appropriate solution there. Resolves: #25132 Co-authored-by: Andrew Kelley <andrew@ziglang.org>
2025-12-06 06:13:07 +00:00 · 2025-09-06 10:23:41 +01:00 · 2025-09-06 10:23:41 +01:00 · f661ab6c36
commit f661ab6c36
parent 9ea4d9aa3b
3 changed files with 70 additions and 15 deletions
--- a/lib/std/Io/Reader.zig
+++ b/lib/std/Io/Reader.zig
@ -449,7 +449,6 @@ pub fn readVecAll(r: *Reader, data: [][]u8) Error!void {
 /// is returned instead.
 ///
 /// See also:
 /// * `peek`
 /// * `toss`
 pub fn peek(r: *Reader, n: usize) Error![]u8 {
    try r.fill(n);
@ -700,7 +699,7 @@ pub const DelimiterError = error{
 };
 /// Returns a slice of the next bytes of buffered data from the stream until
-/// `sentinel` is found, advancing the seek position.
+/// `sentinel` is found, advancing the seek position past the sentinel.
 ///
 /// Returned slice has a sentinel.
 ///
@ -733,7 +732,7 @@ pub fn peekSentinel(r: *Reader, comptime sentinel: u8) DelimiterError![:sentinel
 }
 /// Returns a slice of the next bytes of buffered data from the stream until
-/// `delimiter` is found, advancing the seek position.
+/// `delimiter` is found, advancing the seek position past the delimiter.
 ///
 /// Returned slice includes the delimiter as the last byte.
 ///
@ -786,7 +785,8 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
 }
 /// Returns a slice of the next bytes of buffered data from the stream until
-/// `delimiter` is found, advancing the seek position.
+/// `delimiter` is found, advancing the seek position up to (but not past)
 /// the delimiter.
 ///
 /// Returned slice excludes the delimiter. End-of-stream is treated equivalent
 /// to a delimiter, unless it would result in a length 0 return value, in which
@ -800,20 +800,44 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
 /// Invalidates previously returned values from `peek`.
 ///
 /// See also:
 /// * `takeDelimiter`
 /// * `takeDelimiterInclusive`
 /// * `peekDelimiterExclusive`
 pub fn takeDelimiterExclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
-    const result = r.peekDelimiterInclusive(delimiter) catch |err| switch (err) {
+    const result = try r.peekDelimiterExclusive(delimiter);
    r.toss(result.len);
    return result;
 }
 /// Returns a slice of the next bytes of buffered data from the stream until
 /// `delimiter` is found, advancing the seek position past the delimiter.
 ///
 /// Returned slice excludes the delimiter. End-of-stream is treated equivalent
 /// to a delimiter, unless it would result in a length 0 return value, in which
 /// case `null` is returned instead.
 ///
 /// If the delimiter is not found within a number of bytes matching the
 /// capacity of this `Reader`, `error.StreamTooLong` is returned. In
 /// such case, the stream state is unmodified as if this function was never
 /// called.
 ///
 /// Invalidates previously returned values from `peek`.
 ///
 /// See also:
 /// * `takeDelimiterInclusive`
 /// * `takeDelimiterExclusive`
 pub fn takeDelimiter(r: *Reader, delimiter: u8) error{ ReadFailed, StreamTooLong }!?[]u8 {
    const inclusive = r.peekDelimiterInclusive(delimiter) catch |err| switch (err) {
        error.EndOfStream => {
            const remaining = r.buffer[r.seek..r.end];
-            if (remaining.len == 0) return error.EndOfStream;
+            if (remaining.len == 0) return null;
            r.toss(remaining.len);
            return remaining;
        },
        else => |e| return e,
    };
-    r.toss(result.len);
+    r.toss(inclusive.len);
-    return result[0 .. result.len - 1];
+    return inclusive[0 .. inclusive.len - 1];
 }
 /// Returns a slice of the next bytes of buffered data from the stream until
@ -1336,6 +1360,9 @@ test peekSentinel {
    var r: Reader = .fixed("ab\nc");
    try testing.expectEqualStrings("ab", try r.peekSentinel('\n'));
    try testing.expectEqualStrings("ab", try r.peekSentinel('\n'));
    r.toss(3);
    try testing.expectError(error.EndOfStream, r.peekSentinel('\n'));
    try testing.expectEqualStrings("c", try r.peek(1));
 }
 test takeDelimiterInclusive {
@ -1350,22 +1377,52 @@ test peekDelimiterInclusive {
    try testing.expectEqualStrings("ab\n", try r.peekDelimiterInclusive('\n'));
    r.toss(3);
    try testing.expectError(error.EndOfStream, r.peekDelimiterInclusive('\n'));
    try testing.expectEqualStrings("c", try r.peek(1));
 }
 test takeDelimiterExclusive {
    var r: Reader = .fixed("ab\nc");
    try testing.expectEqualStrings("ab", try r.takeDelimiterExclusive('\n'));
    try testing.expectEqualStrings("", try r.takeDelimiterExclusive('\n'));
    try testing.expectEqualStrings("", try r.takeDelimiterExclusive('\n'));
    try testing.expectEqualStrings("\n", try r.take(1));
    try testing.expectEqualStrings("c", try r.takeDelimiterExclusive('\n'));
    try testing.expectError(error.EndOfStream, r.takeDelimiterExclusive('\n'));
 }
 test peekDelimiterExclusive {
    var r: Reader = .fixed("ab\nc");
    try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n'));
    try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n'));
-    r.toss(3);
+    r.toss(2);
    try testing.expectEqualStrings("", try r.peekDelimiterExclusive('\n'));
    try testing.expectEqualStrings("\n", try r.take(1));
    try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n'));
    try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n'));
    r.toss(1);
    try testing.expectError(error.EndOfStream, r.peekDelimiterExclusive('\n'));
 }
 test takeDelimiter {
    var r: Reader = .fixed("ab\nc\n\nd");
    try testing.expectEqualStrings("ab", (try r.takeDelimiter('\n')).?);
    try testing.expectEqualStrings("c", (try r.takeDelimiter('\n')).?);
    try testing.expectEqualStrings("", (try r.takeDelimiter('\n')).?);
    try testing.expectEqualStrings("d", (try r.takeDelimiter('\n')).?);
    try testing.expectEqual(null, try r.takeDelimiter('\n'));
    try testing.expectEqual(null, try r.takeDelimiter('\n'));
    r = .fixed("ab\nc\n\nd\n"); // one trailing newline does not affect behavior
    try testing.expectEqualStrings("ab", (try r.takeDelimiter('\n')).?);
    try testing.expectEqualStrings("c", (try r.takeDelimiter('\n')).?);
    try testing.expectEqualStrings("", (try r.takeDelimiter('\n')).?);
    try testing.expectEqualStrings("d", (try r.takeDelimiter('\n')).?);
    try testing.expectEqual(null, try r.takeDelimiter('\n'));
    try testing.expectEqual(null, try r.takeDelimiter('\n'));
 }
 test streamDelimiter {
--- a/lib/std/net.zig
+++ b/lib/std/net.zig
@ -1393,7 +1393,7 @@ fn parseHosts(
    br: *Io.Reader,
 ) error{ OutOfMemory, ReadFailed }!void {
    while (true) {
-        const line = br.takeDelimiterExclusive('\n') catch |err| switch (err) {
+        const line = br.takeDelimiter('\n') catch |err| switch (err) {
            error.StreamTooLong => {
                // Skip lines that are too long.
                _ = br.discardDelimiterInclusive('\n') catch |e| switch (e) {
@ -1403,7 +1403,8 @@ fn parseHosts(
                continue;
            },
            error.ReadFailed => return error.ReadFailed,
-            error.EndOfStream => break,
+        } orelse {
            break; // end of stream
        };
        var split_it = mem.splitScalar(u8, line, '#');
        const no_comment_line = split_it.first();
--- a/lib/std/zig/system/linux.zig
+++ b/lib/std/zig/system/linux.zig
@ -358,14 +358,11 @@ fn CpuinfoParser(comptime impl: anytype) type {
    return struct {
        fn parse(arch: Target.Cpu.Arch, reader: *std.Io.Reader) !?Target.Cpu {
            var obj: impl = .{};
-            while (reader.takeDelimiterExclusive('\n')) |line| {
+            while (try reader.takeDelimiter('\n')) |line| {
                const colon_pos = mem.indexOfScalar(u8, line, ':') orelse continue;
                const key = mem.trimEnd(u8, line[0..colon_pos], " \t");
                const value = mem.trimStart(u8, line[colon_pos + 1 ..], " \t");
                if (!try obj.line_hook(key, value)) break;
            } else |err| switch (err) {
                error.EndOfStream => {},
                else => |e| return e,
            }
            return obj.finalize(arch);
        }