diff --git a/lib/std/mem.zig b/lib/std/mem.zig index f1cf02609c..effa629f8b 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1910,72 +1910,117 @@ test "byteSwapAllFields" { }, s); } +/// Deprecated: use `tokenizeAny`, `tokenizeFull`, or `tokenizeScalar` +pub const tokenize = tokenizeAny; + /// Returns an iterator that iterates over the slices of `buffer` that are not -/// any of the bytes in `delimiter_bytes`. +/// any of the items in `delimiters`. /// -/// `tokenize(u8, " abc def ghi ", " ")` will return slices +/// `tokenizeAny(u8, " abc|def || ghi ", " |")` will return slices /// for "abc", "def", "ghi", null, in that order. /// /// If `buffer` is empty, the iterator will return null. -/// If `delimiter_bytes` does not exist in buffer, +/// If none of `delimiters` exist in buffer, /// the iterator will return `buffer`, null, in that order. /// -/// See also: `split` and `splitBackwards`. -pub fn tokenize(comptime T: type, buffer: []const T, delimiter_bytes: []const T) TokenIterator(T) { +/// See also: `tokenizeFull`, `tokenizeScalar`, +/// `splitFull`,`splitAny`, `splitScalar`, +/// `splitBackwardsFull`, `splitBackwardsAny`, and `splitBackwardsScalar` +pub fn tokenizeAny(comptime T: type, buffer: []const T, delimiters: []const T) TokenIterator(T, .any) { return .{ .index = 0, .buffer = buffer, - .delimiter_bytes = delimiter_bytes, + .delimiter = delimiters, }; } -test "tokenize" { - var it = tokenize(u8, " abc def ghi ", " "); +/// Returns an iterator that iterates over the slices of `buffer` that are not +/// the sequence in `delimiter`. +/// +/// `tokenizeFull(u8, "<>abc><>ghi", "<>")` will return slices +/// for "abc>b<><>c><>d><", "<>"); + try testing.expectEqualStrings("a", it.next().?); + try testing.expectEqualStrings("b", it.peek().?); + try testing.expectEqualStrings("b", it.next().?); + try testing.expectEqualStrings("c>", it.next().?); + try testing.expectEqualStrings("d><", it.next().?); try testing.expect(it.next() == null); + try testing.expect(it.peek() == null); + + var it16 = tokenizeFull( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("a<>b<><>c><>d><"), + std.unicode.utf8ToUtf16LeStringLiteral("<>"), + ); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c>"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d><"))); + try testing.expect(it16.next() == null); +} + +test "tokenize (reset)" { + { + var it = tokenizeAny(u8, " abc def ghi ", " "); + try testing.expect(eql(u8, it.next().?, "abc")); + try testing.expect(eql(u8, it.next().?, "def")); + try testing.expect(eql(u8, it.next().?, "ghi")); + + it.reset(); + + try testing.expect(eql(u8, it.next().?, "abc")); + try testing.expect(eql(u8, it.next().?, "def")); + try testing.expect(eql(u8, it.next().?, "ghi")); + try testing.expect(it.next() == null); + } + { + var it = tokenizeFull(u8, "<><>abc<>def<><>ghi<>", "<>"); + try testing.expect(eql(u8, it.next().?, "abc")); + try testing.expect(eql(u8, it.next().?, "def")); + try testing.expect(eql(u8, it.next().?, "ghi")); + + it.reset(); + + try testing.expect(eql(u8, it.next().?, "abc")); + try testing.expect(eql(u8, it.next().?, "def")); + try testing.expect(eql(u8, it.next().?, "ghi")); + try testing.expect(it.next() == null); + } + { + var it = tokenizeScalar(u8, " abc def ghi ", ' '); + try testing.expect(eql(u8, it.next().?, "abc")); + try testing.expect(eql(u8, it.next().?, "def")); + try testing.expect(eql(u8, it.next().?, "ghi")); + + it.reset(); + + try testing.expect(eql(u8, it.next().?, "abc")); + try testing.expect(eql(u8, it.next().?, "def")); + try testing.expect(eql(u8, it.next().?, "ghi")); + try testing.expect(it.next() == null); + } } /// Deprecated: use `splitFull`, `splitAny`, or `splitScalar` @@ -2026,8 +2125,8 @@ pub const split = splitFull; /// The delimiter length must not be zero. /// /// See also: `splitAny`, `splitScalar`, `splitBackwardsFull`, -/// `splitBackwardsAny`,`splitBackwardsScalar`, and -/// `tokenize`. +/// `splitBackwardsAny`,`splitBackwardsScalar`, +/// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar`. pub fn splitFull(comptime T: type, buffer: []const T, delimiter: []const T) SplitIterator(T, .full) { assert(delimiter.len != 0); return .{ @@ -2047,8 +2146,8 @@ pub fn splitFull(comptime T: type, buffer: []const T, delimiter: []const T) Spli /// the iterator will return `buffer`, null, in that order. /// /// See also: `splitFull`, `splitScalar`, `splitBackwardsFull`, -/// `splitBackwardsAny`,`splitBackwardsScalar`, and -/// `tokenize`. +/// `splitBackwardsAny`,`splitBackwardsScalar`, +/// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar`. pub fn splitAny(comptime T: type, buffer: []const T, delimiters: []const T) SplitIterator(T, .any) { return .{ .index = 0, @@ -2067,8 +2166,8 @@ pub fn splitAny(comptime T: type, buffer: []const T, delimiters: []const T) Spli /// the iterator will return `buffer`, null, in that order. /// /// See also: `splitFull`, `splitAny`, `splitBackwardsFull`, -/// `splitBackwardsAny`,`splitBackwardsScalar`, and -/// `tokenize`. +/// `splitBackwardsAny`,`splitBackwardsScalar`, +/// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar`. pub fn splitScalar(comptime T: type, buffer: []const T, delimiter: T) SplitIterator(T, .scalar) { return .{ .index = 0, @@ -2224,8 +2323,8 @@ pub const splitBackwards = splitBackwardsFull; /// The delimiter length must not be zero. /// /// See also: `splitBackwardsAny`, `splitBackwardsScalar`, -/// `splitFull`, `splitAny`,`splitScalar`, and -/// `tokenize`. +/// `splitFull`, `splitAny`,`splitScalar`, +/// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar`. pub fn splitBackwardsFull(comptime T: type, buffer: []const T, delimiter: []const T) SplitBackwardsIterator(T, .full) { assert(delimiter.len != 0); return .{ @@ -2245,8 +2344,8 @@ pub fn splitBackwardsFull(comptime T: type, buffer: []const T, delimiter: []cons /// the iterator will return `buffer`, null, in that order. /// /// See also: `splitBackwardsFull`, `splitBackwardsScalar`, -/// `splitFull`, `splitAny`,`splitScalar`, and -/// `tokenize`. +/// `splitFull`, `splitAny`,`splitScalar`, +/// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar`. pub fn splitBackwardsAny(comptime T: type, buffer: []const T, delimiters: []const T) SplitBackwardsIterator(T, .any) { return .{ .index = buffer.len, @@ -2265,8 +2364,8 @@ pub fn splitBackwardsAny(comptime T: type, buffer: []const T, delimiters: []cons /// the iterator will return `buffer`, null, in that order. /// /// See also: `splitBackwardsFull`, `splitBackwardsAny`, -/// `splitFull`, `splitAny`,`splitScalar`, and -/// `tokenize`. +/// `splitFull`, `splitAny`,`splitScalar`, +/// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar`. pub fn splitBackwardsScalar(comptime T: type, buffer: []const T, delimiter: T) SplitBackwardsIterator(T, .scalar) { return .{ .index = buffer.len, @@ -2596,10 +2695,13 @@ test "endsWith" { pub const DelimiterType = enum { full, any, scalar }; -pub fn TokenIterator(comptime T: type) type { +pub fn TokenIterator(comptime T: type, comptime delimiter_type: DelimiterType) type { return struct { buffer: []const T, - delimiter_bytes: []const T, + delimiter: switch (delimiter_type) { + .full, .any => []const T, + .scalar => T, + }, index: usize, const Self = @This(); @@ -2616,7 +2718,10 @@ pub fn TokenIterator(comptime T: type) type { /// complete. Does not advance to the next token. pub fn peek(self: *Self) ?[]const T { // move to beginning of token - while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} + while (self.index < self.buffer.len and self.isDelimiter(self.index)) : (self.index += switch (delimiter_type) { + .full => self.delimiter.len, + .any, .scalar => 1, + }) {} const start = self.index; if (start == self.buffer.len) { return null; @@ -2624,7 +2729,7 @@ pub fn TokenIterator(comptime T: type) type { // move to end of token var end = start; - while (end < self.buffer.len and !self.isSplitByte(self.buffer[end])) : (end += 1) {} + while (end < self.buffer.len and !self.isDelimiter(end)) : (end += 1) {} return self.buffer[start..end]; } @@ -2633,7 +2738,10 @@ pub fn TokenIterator(comptime T: type) type { pub fn rest(self: Self) []const T { // move to beginning of token var index: usize = self.index; - while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {} + while (index < self.buffer.len and self.isDelimiter(index)) : (index += switch (delimiter_type) { + .full => self.delimiter.len, + .any, .scalar => 1, + }) {} return self.buffer[index..]; } @@ -2642,13 +2750,20 @@ pub fn TokenIterator(comptime T: type) type { self.index = 0; } - fn isSplitByte(self: Self, byte: T) bool { - for (self.delimiter_bytes) |delimiter_byte| { - if (byte == delimiter_byte) { - return true; - } + fn isDelimiter(self: Self, index: usize) bool { + switch (delimiter_type) { + .full => return startsWith(T, self.buffer[index..], self.delimiter), + .any => { + const item = self.buffer[index]; + for (self.delimiter) |delimiter_item| { + if (item == delimiter_item) { + return true; + } + } + return false; + }, + .scalar => return self.buffer[index] == self.delimiter, } - return false; } }; }