Add std.unicode.Utf8Iterator.peek

This commit is contained in:
Michael Rees 2020-06-17 06:04:08 -05:00 committed by Andrew Kelley
parent 5ea0f589c9
commit bd17a373cc

View File

@ -235,6 +235,22 @@ pub const Utf8Iterator = struct {
else => unreachable,
}
}
/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
pub fn peek(it: *Utf8Iterator, n: usize) []const u8 {
const original_i = it.i;
defer it.i = original_i;
var end_ix = original_i;
var found: usize = 0;
while (found < n) : (found += 1) {
const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
end_ix += next_codepoint.len;
}
return it.bytes[original_i..end_ix];
}
};
pub const Utf16LeIterator = struct {
@ -451,6 +467,31 @@ fn testMiscInvalidUtf8() void {
testValid("\xee\x80\x80", 0xe000);
}
test "utf8 iterator peeking" {
comptime testUtf8Peeking();
testUtf8Peeking();
}
fn testUtf8Peeking() void {
const s = Utf8View.initComptime("noël");
var it = s.iterator();
testing.expect(std.mem.eql(u8, "n", it.nextCodepointSlice().?));
testing.expect(std.mem.eql(u8, "o", it.peek(1)));
testing.expect(std.mem.eql(u8, "", it.peek(2)));
testing.expect(std.mem.eql(u8, "oël", it.peek(3)));
testing.expect(std.mem.eql(u8, "oël", it.peek(4)));
testing.expect(std.mem.eql(u8, "oël", it.peek(10)));
testing.expect(std.mem.eql(u8, "o", it.nextCodepointSlice().?));
testing.expect(std.mem.eql(u8, "ë", it.nextCodepointSlice().?));
testing.expect(std.mem.eql(u8, "l", it.nextCodepointSlice().?));
testing.expect(it.nextCodepointSlice() == null);
testing.expect(std.mem.eql(u8, &[_]u8{}, it.peek(1)));
}
fn testError(bytes: []const u8, expected_err: anyerror) void {
testing.expectError(expected_err, testDecode(bytes));
}