std.Io.Reader: fix delimiter bugs

Fix `takeDelimiter` and `takeDelimiterExclusive` tossing too many bytes
(#25132)

Also add/improve test coverage for all delimiter and sentinel methods,
update usages of `takeDelimiterExclusive` to not rely on the fixed bug,
tweak a handful of doc comments, and slightly simplify some logic.

I have not fixed #24950 in this commit because I am a little less
certain about the appropriate solution there.

Resolves: #25132

Co-authored-by: Andrew Kelley <andrew@ziglang.org>
This commit is contained in:
mlugg 2025-09-06 10:23:41 +01:00 committed by Andrew Kelley
parent 9ea4d9aa3b
commit f661ab6c36
3 changed files with 70 additions and 15 deletions

View File

@ -449,7 +449,6 @@ pub fn readVecAll(r: *Reader, data: [][]u8) Error!void {
/// is returned instead. /// is returned instead.
/// ///
/// See also: /// See also:
/// * `peek`
/// * `toss` /// * `toss`
pub fn peek(r: *Reader, n: usize) Error![]u8 { pub fn peek(r: *Reader, n: usize) Error![]u8 {
try r.fill(n); try r.fill(n);
@ -700,7 +699,7 @@ pub const DelimiterError = error{
}; };
/// Returns a slice of the next bytes of buffered data from the stream until /// Returns a slice of the next bytes of buffered data from the stream until
/// `sentinel` is found, advancing the seek position. /// `sentinel` is found, advancing the seek position past the sentinel.
/// ///
/// Returned slice has a sentinel. /// Returned slice has a sentinel.
/// ///
@ -733,7 +732,7 @@ pub fn peekSentinel(r: *Reader, comptime sentinel: u8) DelimiterError![:sentinel
} }
/// Returns a slice of the next bytes of buffered data from the stream until /// Returns a slice of the next bytes of buffered data from the stream until
/// `delimiter` is found, advancing the seek position. /// `delimiter` is found, advancing the seek position past the delimiter.
/// ///
/// Returned slice includes the delimiter as the last byte. /// Returned slice includes the delimiter as the last byte.
/// ///
@ -786,7 +785,8 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
} }
/// Returns a slice of the next bytes of buffered data from the stream until /// Returns a slice of the next bytes of buffered data from the stream until
/// `delimiter` is found, advancing the seek position. /// `delimiter` is found, advancing the seek position up to (but not past)
/// the delimiter.
/// ///
/// Returned slice excludes the delimiter. End-of-stream is treated equivalent /// Returned slice excludes the delimiter. End-of-stream is treated equivalent
/// to a delimiter, unless it would result in a length 0 return value, in which /// to a delimiter, unless it would result in a length 0 return value, in which
@ -800,20 +800,44 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
/// Invalidates previously returned values from `peek`. /// Invalidates previously returned values from `peek`.
/// ///
/// See also: /// See also:
/// * `takeDelimiter`
/// * `takeDelimiterInclusive` /// * `takeDelimiterInclusive`
/// * `peekDelimiterExclusive` /// * `peekDelimiterExclusive`
pub fn takeDelimiterExclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 { pub fn takeDelimiterExclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
const result = r.peekDelimiterInclusive(delimiter) catch |err| switch (err) { const result = try r.peekDelimiterExclusive(delimiter);
r.toss(result.len);
return result;
}
/// Returns a slice of the next bytes of buffered data from the stream until
/// `delimiter` is found, advancing the seek position past the delimiter.
///
/// Returned slice excludes the delimiter. End-of-stream is treated equivalent
/// to a delimiter, unless it would result in a length 0 return value, in which
/// case `null` is returned instead.
///
/// If the delimiter is not found within a number of bytes matching the
/// capacity of this `Reader`, `error.StreamTooLong` is returned. In
/// such case, the stream state is unmodified as if this function was never
/// called.
///
/// Invalidates previously returned values from `peek`.
///
/// See also:
/// * `takeDelimiterInclusive`
/// * `takeDelimiterExclusive`
pub fn takeDelimiter(r: *Reader, delimiter: u8) error{ ReadFailed, StreamTooLong }!?[]u8 {
const inclusive = r.peekDelimiterInclusive(delimiter) catch |err| switch (err) {
error.EndOfStream => { error.EndOfStream => {
const remaining = r.buffer[r.seek..r.end]; const remaining = r.buffer[r.seek..r.end];
if (remaining.len == 0) return error.EndOfStream; if (remaining.len == 0) return null;
r.toss(remaining.len); r.toss(remaining.len);
return remaining; return remaining;
}, },
else => |e| return e, else => |e| return e,
}; };
r.toss(result.len); r.toss(inclusive.len);
return result[0 .. result.len - 1]; return inclusive[0 .. inclusive.len - 1];
} }
/// Returns a slice of the next bytes of buffered data from the stream until /// Returns a slice of the next bytes of buffered data from the stream until
@ -1336,6 +1360,9 @@ test peekSentinel {
var r: Reader = .fixed("ab\nc"); var r: Reader = .fixed("ab\nc");
try testing.expectEqualStrings("ab", try r.peekSentinel('\n')); try testing.expectEqualStrings("ab", try r.peekSentinel('\n'));
try testing.expectEqualStrings("ab", try r.peekSentinel('\n')); try testing.expectEqualStrings("ab", try r.peekSentinel('\n'));
r.toss(3);
try testing.expectError(error.EndOfStream, r.peekSentinel('\n'));
try testing.expectEqualStrings("c", try r.peek(1));
} }
test takeDelimiterInclusive { test takeDelimiterInclusive {
@ -1350,22 +1377,52 @@ test peekDelimiterInclusive {
try testing.expectEqualStrings("ab\n", try r.peekDelimiterInclusive('\n')); try testing.expectEqualStrings("ab\n", try r.peekDelimiterInclusive('\n'));
r.toss(3); r.toss(3);
try testing.expectError(error.EndOfStream, r.peekDelimiterInclusive('\n')); try testing.expectError(error.EndOfStream, r.peekDelimiterInclusive('\n'));
try testing.expectEqualStrings("c", try r.peek(1));
} }
test takeDelimiterExclusive { test takeDelimiterExclusive {
var r: Reader = .fixed("ab\nc"); var r: Reader = .fixed("ab\nc");
try testing.expectEqualStrings("ab", try r.takeDelimiterExclusive('\n')); try testing.expectEqualStrings("ab", try r.takeDelimiterExclusive('\n'));
try testing.expectEqualStrings("", try r.takeDelimiterExclusive('\n'));
try testing.expectEqualStrings("", try r.takeDelimiterExclusive('\n'));
try testing.expectEqualStrings("\n", try r.take(1));
try testing.expectEqualStrings("c", try r.takeDelimiterExclusive('\n')); try testing.expectEqualStrings("c", try r.takeDelimiterExclusive('\n'));
try testing.expectError(error.EndOfStream, r.takeDelimiterExclusive('\n')); try testing.expectError(error.EndOfStream, r.takeDelimiterExclusive('\n'));
} }
test peekDelimiterExclusive { test peekDelimiterExclusive {
var r: Reader = .fixed("ab\nc"); var r: Reader = .fixed("ab\nc");
try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n')); try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n'));
try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n')); try testing.expectEqualStrings("ab", try r.peekDelimiterExclusive('\n'));
r.toss(3); r.toss(2);
try testing.expectEqualStrings("", try r.peekDelimiterExclusive('\n'));
try testing.expectEqualStrings("\n", try r.take(1));
try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n')); try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n'));
try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n')); try testing.expectEqualStrings("c", try r.peekDelimiterExclusive('\n'));
r.toss(1);
try testing.expectError(error.EndOfStream, r.peekDelimiterExclusive('\n'));
}
test takeDelimiter {
var r: Reader = .fixed("ab\nc\n\nd");
try testing.expectEqualStrings("ab", (try r.takeDelimiter('\n')).?);
try testing.expectEqualStrings("c", (try r.takeDelimiter('\n')).?);
try testing.expectEqualStrings("", (try r.takeDelimiter('\n')).?);
try testing.expectEqualStrings("d", (try r.takeDelimiter('\n')).?);
try testing.expectEqual(null, try r.takeDelimiter('\n'));
try testing.expectEqual(null, try r.takeDelimiter('\n'));
r = .fixed("ab\nc\n\nd\n"); // one trailing newline does not affect behavior
try testing.expectEqualStrings("ab", (try r.takeDelimiter('\n')).?);
try testing.expectEqualStrings("c", (try r.takeDelimiter('\n')).?);
try testing.expectEqualStrings("", (try r.takeDelimiter('\n')).?);
try testing.expectEqualStrings("d", (try r.takeDelimiter('\n')).?);
try testing.expectEqual(null, try r.takeDelimiter('\n'));
try testing.expectEqual(null, try r.takeDelimiter('\n'));
} }
test streamDelimiter { test streamDelimiter {

View File

@ -1393,7 +1393,7 @@ fn parseHosts(
br: *Io.Reader, br: *Io.Reader,
) error{ OutOfMemory, ReadFailed }!void { ) error{ OutOfMemory, ReadFailed }!void {
while (true) { while (true) {
const line = br.takeDelimiterExclusive('\n') catch |err| switch (err) { const line = br.takeDelimiter('\n') catch |err| switch (err) {
error.StreamTooLong => { error.StreamTooLong => {
// Skip lines that are too long. // Skip lines that are too long.
_ = br.discardDelimiterInclusive('\n') catch |e| switch (e) { _ = br.discardDelimiterInclusive('\n') catch |e| switch (e) {
@ -1403,7 +1403,8 @@ fn parseHosts(
continue; continue;
}, },
error.ReadFailed => return error.ReadFailed, error.ReadFailed => return error.ReadFailed,
error.EndOfStream => break, } orelse {
break; // end of stream
}; };
var split_it = mem.splitScalar(u8, line, '#'); var split_it = mem.splitScalar(u8, line, '#');
const no_comment_line = split_it.first(); const no_comment_line = split_it.first();

View File

@ -358,14 +358,11 @@ fn CpuinfoParser(comptime impl: anytype) type {
return struct { return struct {
fn parse(arch: Target.Cpu.Arch, reader: *std.Io.Reader) !?Target.Cpu { fn parse(arch: Target.Cpu.Arch, reader: *std.Io.Reader) !?Target.Cpu {
var obj: impl = .{}; var obj: impl = .{};
while (reader.takeDelimiterExclusive('\n')) |line| { while (try reader.takeDelimiter('\n')) |line| {
const colon_pos = mem.indexOfScalar(u8, line, ':') orelse continue; const colon_pos = mem.indexOfScalar(u8, line, ':') orelse continue;
const key = mem.trimEnd(u8, line[0..colon_pos], " \t"); const key = mem.trimEnd(u8, line[0..colon_pos], " \t");
const value = mem.trimStart(u8, line[colon_pos + 1 ..], " \t"); const value = mem.trimStart(u8, line[colon_pos + 1 ..], " \t");
if (!try obj.line_hook(key, value)) break; if (!try obj.line_hook(key, value)) break;
} else |err| switch (err) {
error.EndOfStream => {},
else => |e| return e,
} }
return obj.finalize(arch); return obj.finalize(arch);
} }