diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 75cb4dd9d0..93d425b692 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1575,8 +1575,8 @@ test "bswapAllFields" { /// If `delimiter_bytes` does not exist in buffer, /// the iterator will return `buffer`, null, in that order. /// See also the related function `split`. -pub fn tokenize(buffer: []const u8, delimiter_bytes: []const u8) TokenIterator { - return TokenIterator{ +pub fn tokenize(comptime T: type, buffer: []const T, delimiter_bytes: []const T) TokenIterator(T) { + return .{ .index = 0, .buffer = buffer, .delimiter_bytes = delimiter_bytes, @@ -1584,51 +1584,71 @@ pub fn tokenize(buffer: []const u8, delimiter_bytes: []const u8) TokenIterator { } test "mem.tokenize" { - var it = tokenize(" abc def ghi ", " "); + var it = tokenize(u8, " abc def ghi ", " "); try testing.expect(eql(u8, it.next().?, "abc")); try testing.expect(eql(u8, it.next().?, "def")); try testing.expect(eql(u8, it.next().?, "ghi")); try testing.expect(it.next() == null); - it = tokenize("..\\bob", "\\"); + it = tokenize(u8, "..\\bob", "\\"); try testing.expect(eql(u8, it.next().?, "..")); try testing.expect(eql(u8, "..", "..\\bob"[0..it.index])); try testing.expect(eql(u8, it.next().?, "bob")); try testing.expect(it.next() == null); - it = tokenize("//a/b", "/"); + it = tokenize(u8, "//a/b", "/"); try testing.expect(eql(u8, it.next().?, "a")); try testing.expect(eql(u8, it.next().?, "b")); try testing.expect(eql(u8, "//a/b", "//a/b"[0..it.index])); try testing.expect(it.next() == null); - it = tokenize("|", "|"); + it = tokenize(u8, "|", "|"); try testing.expect(it.next() == null); - it = tokenize("", "|"); + it = tokenize(u8, "", "|"); try testing.expect(it.next() == null); - it = tokenize("hello", ""); + it = tokenize(u8, "hello", ""); try testing.expect(eql(u8, it.next().?, "hello")); try testing.expect(it.next() == null); - it = tokenize("hello", " "); + it = tokenize(u8, "hello", " "); try testing.expect(eql(u8, it.next().?, "hello")); try testing.expect(it.next() == null); + + var it16 = tokenize( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("hello"), + std.unicode.utf8ToUtf16LeStringLiteral(" "), + ); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello"))); + try testing.expect(it16.next() == null); } test "mem.tokenize (multibyte)" { - var it = tokenize("a|b,c/d e", " /,|"); + var it = tokenize(u8, "a|b,c/d e", " /,|"); try testing.expect(eql(u8, it.next().?, "a")); try testing.expect(eql(u8, it.next().?, "b")); try testing.expect(eql(u8, it.next().?, "c")); try testing.expect(eql(u8, it.next().?, "d")); try testing.expect(eql(u8, it.next().?, "e")); try testing.expect(it.next() == null); + + var it16 = tokenize( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("a|b,c/d e"), + std.unicode.utf8ToUtf16LeStringLiteral(" /,|"), + ); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e"))); + try testing.expect(it16.next() == null); } test "mem.tokenize (reset)" { - var it = tokenize(" abc def ghi ", " "); + var it = tokenize(u8, " abc def ghi ", " "); try testing.expect(eql(u8, it.next().?, "abc")); try testing.expect(eql(u8, it.next().?, "def")); try testing.expect(eql(u8, it.next().?, "ghi")); @@ -1649,9 +1669,9 @@ test "mem.tokenize (reset)" { /// the iterator will return `buffer`, null, in that order. /// The delimiter length must not be zero. /// See also the related function `tokenize`. -pub fn split(buffer: []const u8, delimiter: []const u8) SplitIterator { +pub fn split(comptime T: type, buffer: []const T, delimiter: []const T) SplitIterator(T) { assert(delimiter.len != 0); - return SplitIterator{ + return .{ .index = 0, .buffer = buffer, .delimiter = delimiter, @@ -1661,35 +1681,55 @@ pub fn split(buffer: []const u8, delimiter: []const u8) SplitIterator { pub const separate = @compileError("deprecated: renamed to split (behavior remains unchanged)"); test "mem.split" { - var it = split("abc|def||ghi", "|"); + var it = split(u8, "abc|def||ghi", "|"); try testing.expect(eql(u8, it.next().?, "abc")); try testing.expect(eql(u8, it.next().?, "def")); try testing.expect(eql(u8, it.next().?, "")); try testing.expect(eql(u8, it.next().?, "ghi")); try testing.expect(it.next() == null); - it = split("", "|"); + it = split(u8, "", "|"); try testing.expect(eql(u8, it.next().?, "")); try testing.expect(it.next() == null); - it = split("|", "|"); + it = split(u8, "|", "|"); try testing.expect(eql(u8, it.next().?, "")); try testing.expect(eql(u8, it.next().?, "")); try testing.expect(it.next() == null); - it = split("hello", " "); + it = split(u8, "hello", " "); try testing.expect(eql(u8, it.next().?, "hello")); try testing.expect(it.next() == null); + + var it16 = split( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("hello"), + std.unicode.utf8ToUtf16LeStringLiteral(" "), + ); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello"))); + try testing.expect(it16.next() == null); } test "mem.split (multibyte)" { - var it = split("a, b ,, c, d, e", ", "); + var it = split(u8, "a, b ,, c, d, e", ", "); try testing.expect(eql(u8, it.next().?, "a")); try testing.expect(eql(u8, it.next().?, "b ,")); try testing.expect(eql(u8, it.next().?, "c")); try testing.expect(eql(u8, it.next().?, "d")); try testing.expect(eql(u8, it.next().?, "e")); try testing.expect(it.next() == null); + + var it16 = split( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("a, b ,, c, d, e"), + std.unicode.utf8ToUtf16LeStringLiteral(", "), + ); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d"))); + try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e"))); + try testing.expect(it16.next() == null); } pub fn startsWith(comptime T: type, haystack: []const T, needle: []const T) bool { @@ -1710,75 +1750,83 @@ test "mem.endsWith" { try testing.expect(!endsWith(u8, "Bob", "Bo")); } -pub const TokenIterator = struct { - buffer: []const u8, - delimiter_bytes: []const u8, - index: usize, +pub fn TokenIterator(comptime T: type) type { + return struct { + buffer: []const T, + delimiter_bytes: []const T, + index: usize, - /// Returns a slice of the next token, or null if tokenization is complete. - pub fn next(self: *TokenIterator) ?[]const u8 { - // move to beginning of token - while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} - const start = self.index; - if (start == self.buffer.len) { - return null; - } + const Self = @This(); - // move to end of token - while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} - const end = self.index; - - return self.buffer[start..end]; - } - - /// Returns a slice of the remaining bytes. Does not affect iterator state. - pub fn rest(self: TokenIterator) []const u8 { - // move to beginning of token - var index: usize = self.index; - while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {} - return self.buffer[index..]; - } - - /// Resets the iterator to the initial token. - pub fn reset(self: *TokenIterator) void { - self.index = 0; - } - - fn isSplitByte(self: TokenIterator, byte: u8) bool { - for (self.delimiter_bytes) |delimiter_byte| { - if (byte == delimiter_byte) { - return true; + /// Returns a slice of the next token, or null if tokenization is complete. + pub fn next(self: *Self) ?[]const T { + // move to beginning of token + while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} + const start = self.index; + if (start == self.buffer.len) { + return null; } + + // move to end of token + while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} + const end = self.index; + + return self.buffer[start..end]; } - return false; - } -}; -pub const SplitIterator = struct { - buffer: []const u8, - index: ?usize, - delimiter: []const u8, + /// Returns a slice of the remaining bytes. Does not affect iterator state. + pub fn rest(self: Self) []const T { + // move to beginning of token + var index: usize = self.index; + while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {} + return self.buffer[index..]; + } - /// Returns a slice of the next field, or null if splitting is complete. - pub fn next(self: *SplitIterator) ?[]const u8 { - const start = self.index orelse return null; - const end = if (indexOfPos(u8, self.buffer, start, self.delimiter)) |delim_start| blk: { - self.index = delim_start + self.delimiter.len; - break :blk delim_start; - } else blk: { - self.index = null; - break :blk self.buffer.len; - }; - return self.buffer[start..end]; - } + /// Resets the iterator to the initial token. + pub fn reset(self: *Self) void { + self.index = 0; + } - /// Returns a slice of the remaining bytes. Does not affect iterator state. - pub fn rest(self: SplitIterator) []const u8 { - const end = self.buffer.len; - const start = self.index orelse end; - return self.buffer[start..end]; - } -}; + fn isSplitByte(self: Self, byte: T) bool { + for (self.delimiter_bytes) |delimiter_byte| { + if (byte == delimiter_byte) { + return true; + } + } + return false; + } + }; +} + +pub fn SplitIterator(comptime T: type) type { + return struct { + buffer: []const T, + index: ?usize, + delimiter: []const T, + + const Self = @This(); + + /// Returns a slice of the next field, or null if splitting is complete. + pub fn next(self: *Self) ?[]const T { + const start = self.index orelse return null; + const end = if (indexOfPos(T, self.buffer, start, self.delimiter)) |delim_start| blk: { + self.index = delim_start + self.delimiter.len; + break :blk delim_start; + } else blk: { + self.index = null; + break :blk self.buffer.len; + }; + return self.buffer[start..end]; + } + + /// Returns a slice of the remaining bytes. Does not affect iterator state. + pub fn rest(self: Self) []const T { + const end = self.buffer.len; + const start = self.index orelse end; + return self.buffer[start..end]; + } + }; +} /// Naively combines a series of slices with a separator. /// Allocates memory for the result, which must be freed by the caller.