From f661ab6c36c0366331451986a29ee14ac05fd8eb Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 6 Sep 2025 10:23:41 +0100 Subject: [PATCH] std.Io.Reader: fix delimiter bugs Fix `takeDelimiter` and `takeDelimiterExclusive` tossing too many bytes (#25132) Also add/improve test coverage for all delimiter and sentinel methods, update usages of `takeDelimiterExclusive` to not rely on the fixed bug, tweak a handful of doc comments, and slightly simplify some logic. I have not fixed #24950 in this commit because I am a little less certain about the appropriate solution there. Resolves: #25132 Co-authored-by: Andrew Kelley --- lib/std/Io/Reader.zig | 75 +++++++++++++++++++++++++++++++----- lib/std/net.zig | 5 ++- lib/std/zig/system/linux.zig | 5 +-- 3 files changed, 70 insertions(+), 15 deletions(-) diff --git a/lib/std/Io/Reader.zig b/lib/std/Io/Reader.zig index b62e81cd13..68957a8902 100644 --- a/lib/std/Io/Reader.zig +++ b/lib/std/Io/Reader.zig @@ -449,7 +449,6 @@ pub fn readVecAll(r: *Reader, data: [][]u8) Error!void { /// is returned instead. /// /// See also: -/// * `peek` /// * `toss` pub fn peek(r: *Reader, n: usize) Error![]u8 { try r.fill(n); @@ -700,7 +699,7 @@ pub const DelimiterError = error{ }; /// Returns a slice of the next bytes of buffered data from the stream until -/// `sentinel` is found, advancing the seek position. +/// `sentinel` is found, advancing the seek position past the sentinel. /// /// Returned slice has a sentinel. /// @@ -733,7 +732,7 @@ pub fn peekSentinel(r: *Reader, comptime sentinel: u8) DelimiterError![:sentinel } /// Returns a slice of the next bytes of buffered data from the stream until -/// `delimiter` is found, advancing the seek position. +/// `delimiter` is found, advancing the seek position past the delimiter. /// /// Returned slice includes the delimiter as the last byte. /// @@ -786,7 +785,8 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 { } /// Returns a slice of the next bytes of buffered data from the stream until -/// `delimiter` is found, advancing the seek position. +/// `delimiter` is found, advancing the seek position up to (but not past) +/// the delimiter. /// /// Returned slice excludes the delimiter. End-of-stream is treated equivalent /// to a delimiter, unless it would result in a length 0 return value, in which @@ -800,20 +800,44 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 { /// Invalidates previously returned values from `peek`. /// /// See also: +/// * `takeDelimiter` /// * `takeDelimiterInclusive` /// * `peekDelimiterExclusive` pub fn takeDelimiterExclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 { - const result = r.peekDelimiterInclusive(delimiter) catch |err| switch (err) { + const result = try r.peekDelimiterExclusive(delimiter); + r.toss(result.len); + return result; +} + +/// Returns a slice of the next bytes of buffered data from the stream until +/// `delimiter` is found, advancing the seek position past the delimiter. +/// +/// Returned slice excludes the delimiter. End-of-stream is treated equivalent +/// to a delimiter, unless it would result in a length 0 return value, in which +/// case `null` is returned instead. +/// +/// If the delimiter is not found within a number of bytes matching the +/// capacity of this `Reader`, `error.StreamTooLong` is returned. In +/// such case, the stream state is unmodified as if this function was never +/// called. +/// +/// Invalidates previously returned values from `peek`. +/// +/// See also: +/// * `takeDelimiterInclusive` +/// * `takeDelimiterExclusive` +pub fn takeDelimiter(r: *Reader, delimiter: u8) error{ ReadFailed, StreamTooLong }!?[]u8 { + const inclusive = r.peekDelimiterInclusive(delimiter) catch |err| switch (err) { error.EndOfStream => { const remaining = r.buffer[r.seek..r.end]; - if (remaining.len == 0) return error.EndOfStream; + if (remaining.len == 0) return null; r.toss(remaining.len); return remaining; }, else => |e| return e, }; - r.toss(result.len); - return result[0 .. result.len - 1]; + r.toss(inclusive.len); + return inclusive[0 .. inclusive.len - 1]; } /// Returns a slice of the next bytes of buffered data from the stream until @@ -1336,6 +1360,9 @@ test peekSentinel { var r: Reader = .fixed("ab\nc"); try testing.expectEqualStrings("ab", try r.peekSentinel('\n')); try testing.expectEqualStrings("ab", try r.peekSentinel('\n')); + r.toss(3); + try testing.expectError(error.EndOfStream, r.peekSentinel('\n')); + try testing.expectEqualStrings("c", try r.peek(1)); } test takeDelimiterInclusive { @@ -1350,22 +1377,52 @@ test peekDelimiterInclusive { try testing.expectEqualStrings("ab\n", try r.peekDelimiterInclusive('\n')); r.toss(3); try testing.expectError(error.EndOfStream, r.peekDelimiterInclusive('\n')); + try testing.expectEqualStrings("c", try r.peek(1)); } test takeDelimiterExclusive { var r: Reader = .fixed("ab\nc"); + try testing.expectEqualStrings("ab", try r.takeDelimiterExclusive('\n')); + try testing.expectEqualStrings("", try r.takeDelimiterExclusive('\n')); + try testing.expectEqualStrings("", try r.takeDelimiterExclusive('\n')); + try testing.expectEqualStrings("\n", try r.take(1)); + try testing.expectEqualStrings("c", try r.takeDelimiterExclusive('\n')); try testing.expectError(error.EndOfStream, r.takeDelimiterExclusive('\n')); } test peekDelimiterExclusive { var r: Reader = .fixed("ab\nc"); + try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n')); try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n')); - r.toss(3); + r.toss(2); + try testing.expectEqualStrings("", try r.peekDelimiterExclusive('\n')); + try testing.expectEqualStrings("\n", try r.take(1)); + try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n')); try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n')); + r.toss(1); + try testing.expectError(error.EndOfStream, r.peekDelimiterExclusive('\n')); +} + +test takeDelimiter { + var r: Reader = .fixed("ab\nc\n\nd"); + try testing.expectEqualStrings("ab", (try r.takeDelimiter('\n')).?); + try testing.expectEqualStrings("c", (try r.takeDelimiter('\n')).?); + try testing.expectEqualStrings("", (try r.takeDelimiter('\n')).?); + try testing.expectEqualStrings("d", (try r.takeDelimiter('\n')).?); + try testing.expectEqual(null, try r.takeDelimiter('\n')); + try testing.expectEqual(null, try r.takeDelimiter('\n')); + + r = .fixed("ab\nc\n\nd\n"); // one trailing newline does not affect behavior + try testing.expectEqualStrings("ab", (try r.takeDelimiter('\n')).?); + try testing.expectEqualStrings("c", (try r.takeDelimiter('\n')).?); + try testing.expectEqualStrings("", (try r.takeDelimiter('\n')).?); + try testing.expectEqualStrings("d", (try r.takeDelimiter('\n')).?); + try testing.expectEqual(null, try r.takeDelimiter('\n')); + try testing.expectEqual(null, try r.takeDelimiter('\n')); } test streamDelimiter { diff --git a/lib/std/net.zig b/lib/std/net.zig index 083bda85d5..03383b0f89 100644 --- a/lib/std/net.zig +++ b/lib/std/net.zig @@ -1393,7 +1393,7 @@ fn parseHosts( br: *Io.Reader, ) error{ OutOfMemory, ReadFailed }!void { while (true) { - const line = br.takeDelimiterExclusive('\n') catch |err| switch (err) { + const line = br.takeDelimiter('\n') catch |err| switch (err) { error.StreamTooLong => { // Skip lines that are too long. _ = br.discardDelimiterInclusive('\n') catch |e| switch (e) { @@ -1403,7 +1403,8 @@ fn parseHosts( continue; }, error.ReadFailed => return error.ReadFailed, - error.EndOfStream => break, + } orelse { + break; // end of stream }; var split_it = mem.splitScalar(u8, line, '#'); const no_comment_line = split_it.first(); diff --git a/lib/std/zig/system/linux.zig b/lib/std/zig/system/linux.zig index df70e71f5b..c0fe9a9ceb 100644 --- a/lib/std/zig/system/linux.zig +++ b/lib/std/zig/system/linux.zig @@ -358,14 +358,11 @@ fn CpuinfoParser(comptime impl: anytype) type { return struct { fn parse(arch: Target.Cpu.Arch, reader: *std.Io.Reader) !?Target.Cpu { var obj: impl = .{}; - while (reader.takeDelimiterExclusive('\n')) |line| { + while (try reader.takeDelimiter('\n')) |line| { const colon_pos = mem.indexOfScalar(u8, line, ':') orelse continue; const key = mem.trimEnd(u8, line[0..colon_pos], " \t"); const value = mem.trimStart(u8, line[colon_pos + 1 ..], " \t"); if (!try obj.line_hook(key, value)) break; - } else |err| switch (err) { - error.EndOfStream => {}, - else => |e| return e, } return obj.finalize(arch); }