mirror of
https://github.com/ziglang/zig.git
synced 2025-12-24 23:23:07 +00:00
Make mem.split and mem.tokenize generic instead of assuming u8
This allows these functions to work on slices of u16, etc
This commit is contained in:
parent
ea7bdeb67d
commit
05fd20dc10
208
lib/std/mem.zig
208
lib/std/mem.zig
@ -1575,8 +1575,8 @@ test "bswapAllFields" {
|
||||
/// If `delimiter_bytes` does not exist in buffer,
|
||||
/// the iterator will return `buffer`, null, in that order.
|
||||
/// See also the related function `split`.
|
||||
pub fn tokenize(buffer: []const u8, delimiter_bytes: []const u8) TokenIterator {
|
||||
return TokenIterator{
|
||||
pub fn tokenize(comptime T: type, buffer: []const T, delimiter_bytes: []const T) TokenIterator(T) {
|
||||
return .{
|
||||
.index = 0,
|
||||
.buffer = buffer,
|
||||
.delimiter_bytes = delimiter_bytes,
|
||||
@ -1584,51 +1584,71 @@ pub fn tokenize(buffer: []const u8, delimiter_bytes: []const u8) TokenIterator {
|
||||
}
|
||||
|
||||
test "mem.tokenize" {
|
||||
var it = tokenize(" abc def ghi ", " ");
|
||||
var it = tokenize(u8, " abc def ghi ", " ");
|
||||
try testing.expect(eql(u8, it.next().?, "abc"));
|
||||
try testing.expect(eql(u8, it.next().?, "def"));
|
||||
try testing.expect(eql(u8, it.next().?, "ghi"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = tokenize("..\\bob", "\\");
|
||||
it = tokenize(u8, "..\\bob", "\\");
|
||||
try testing.expect(eql(u8, it.next().?, ".."));
|
||||
try testing.expect(eql(u8, "..", "..\\bob"[0..it.index]));
|
||||
try testing.expect(eql(u8, it.next().?, "bob"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = tokenize("//a/b", "/");
|
||||
it = tokenize(u8, "//a/b", "/");
|
||||
try testing.expect(eql(u8, it.next().?, "a"));
|
||||
try testing.expect(eql(u8, it.next().?, "b"));
|
||||
try testing.expect(eql(u8, "//a/b", "//a/b"[0..it.index]));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = tokenize("|", "|");
|
||||
it = tokenize(u8, "|", "|");
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = tokenize("", "|");
|
||||
it = tokenize(u8, "", "|");
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = tokenize("hello", "");
|
||||
it = tokenize(u8, "hello", "");
|
||||
try testing.expect(eql(u8, it.next().?, "hello"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = tokenize("hello", " ");
|
||||
it = tokenize(u8, "hello", " ");
|
||||
try testing.expect(eql(u8, it.next().?, "hello"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
var it16 = tokenize(
|
||||
u16,
|
||||
std.unicode.utf8ToUtf16LeStringLiteral("hello"),
|
||||
std.unicode.utf8ToUtf16LeStringLiteral(" "),
|
||||
);
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello")));
|
||||
try testing.expect(it16.next() == null);
|
||||
}
|
||||
|
||||
test "mem.tokenize (multibyte)" {
|
||||
var it = tokenize("a|b,c/d e", " /,|");
|
||||
var it = tokenize(u8, "a|b,c/d e", " /,|");
|
||||
try testing.expect(eql(u8, it.next().?, "a"));
|
||||
try testing.expect(eql(u8, it.next().?, "b"));
|
||||
try testing.expect(eql(u8, it.next().?, "c"));
|
||||
try testing.expect(eql(u8, it.next().?, "d"));
|
||||
try testing.expect(eql(u8, it.next().?, "e"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
var it16 = tokenize(
|
||||
u16,
|
||||
std.unicode.utf8ToUtf16LeStringLiteral("a|b,c/d e"),
|
||||
std.unicode.utf8ToUtf16LeStringLiteral(" /,|"),
|
||||
);
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e")));
|
||||
try testing.expect(it16.next() == null);
|
||||
}
|
||||
|
||||
test "mem.tokenize (reset)" {
|
||||
var it = tokenize(" abc def ghi ", " ");
|
||||
var it = tokenize(u8, " abc def ghi ", " ");
|
||||
try testing.expect(eql(u8, it.next().?, "abc"));
|
||||
try testing.expect(eql(u8, it.next().?, "def"));
|
||||
try testing.expect(eql(u8, it.next().?, "ghi"));
|
||||
@ -1649,9 +1669,9 @@ test "mem.tokenize (reset)" {
|
||||
/// the iterator will return `buffer`, null, in that order.
|
||||
/// The delimiter length must not be zero.
|
||||
/// See also the related function `tokenize`.
|
||||
pub fn split(buffer: []const u8, delimiter: []const u8) SplitIterator {
|
||||
pub fn split(comptime T: type, buffer: []const T, delimiter: []const T) SplitIterator(T) {
|
||||
assert(delimiter.len != 0);
|
||||
return SplitIterator{
|
||||
return .{
|
||||
.index = 0,
|
||||
.buffer = buffer,
|
||||
.delimiter = delimiter,
|
||||
@ -1661,35 +1681,55 @@ pub fn split(buffer: []const u8, delimiter: []const u8) SplitIterator {
|
||||
pub const separate = @compileError("deprecated: renamed to split (behavior remains unchanged)");
|
||||
|
||||
test "mem.split" {
|
||||
var it = split("abc|def||ghi", "|");
|
||||
var it = split(u8, "abc|def||ghi", "|");
|
||||
try testing.expect(eql(u8, it.next().?, "abc"));
|
||||
try testing.expect(eql(u8, it.next().?, "def"));
|
||||
try testing.expect(eql(u8, it.next().?, ""));
|
||||
try testing.expect(eql(u8, it.next().?, "ghi"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = split("", "|");
|
||||
it = split(u8, "", "|");
|
||||
try testing.expect(eql(u8, it.next().?, ""));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = split("|", "|");
|
||||
it = split(u8, "|", "|");
|
||||
try testing.expect(eql(u8, it.next().?, ""));
|
||||
try testing.expect(eql(u8, it.next().?, ""));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
it = split("hello", " ");
|
||||
it = split(u8, "hello", " ");
|
||||
try testing.expect(eql(u8, it.next().?, "hello"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
var it16 = split(
|
||||
u16,
|
||||
std.unicode.utf8ToUtf16LeStringLiteral("hello"),
|
||||
std.unicode.utf8ToUtf16LeStringLiteral(" "),
|
||||
);
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello")));
|
||||
try testing.expect(it16.next() == null);
|
||||
}
|
||||
|
||||
test "mem.split (multibyte)" {
|
||||
var it = split("a, b ,, c, d, e", ", ");
|
||||
var it = split(u8, "a, b ,, c, d, e", ", ");
|
||||
try testing.expect(eql(u8, it.next().?, "a"));
|
||||
try testing.expect(eql(u8, it.next().?, "b ,"));
|
||||
try testing.expect(eql(u8, it.next().?, "c"));
|
||||
try testing.expect(eql(u8, it.next().?, "d"));
|
||||
try testing.expect(eql(u8, it.next().?, "e"));
|
||||
try testing.expect(it.next() == null);
|
||||
|
||||
var it16 = split(
|
||||
u16,
|
||||
std.unicode.utf8ToUtf16LeStringLiteral("a, b ,, c, d, e"),
|
||||
std.unicode.utf8ToUtf16LeStringLiteral(", "),
|
||||
);
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d")));
|
||||
try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e")));
|
||||
try testing.expect(it16.next() == null);
|
||||
}
|
||||
|
||||
pub fn startsWith(comptime T: type, haystack: []const T, needle: []const T) bool {
|
||||
@ -1710,75 +1750,83 @@ test "mem.endsWith" {
|
||||
try testing.expect(!endsWith(u8, "Bob", "Bo"));
|
||||
}
|
||||
|
||||
pub const TokenIterator = struct {
|
||||
buffer: []const u8,
|
||||
delimiter_bytes: []const u8,
|
||||
index: usize,
|
||||
pub fn TokenIterator(comptime T: type) type {
|
||||
return struct {
|
||||
buffer: []const T,
|
||||
delimiter_bytes: []const T,
|
||||
index: usize,
|
||||
|
||||
/// Returns a slice of the next token, or null if tokenization is complete.
|
||||
pub fn next(self: *TokenIterator) ?[]const u8 {
|
||||
// move to beginning of token
|
||||
while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
|
||||
const start = self.index;
|
||||
if (start == self.buffer.len) {
|
||||
return null;
|
||||
}
|
||||
const Self = @This();
|
||||
|
||||
// move to end of token
|
||||
while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
|
||||
const end = self.index;
|
||||
|
||||
return self.buffer[start..end];
|
||||
}
|
||||
|
||||
/// Returns a slice of the remaining bytes. Does not affect iterator state.
|
||||
pub fn rest(self: TokenIterator) []const u8 {
|
||||
// move to beginning of token
|
||||
var index: usize = self.index;
|
||||
while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {}
|
||||
return self.buffer[index..];
|
||||
}
|
||||
|
||||
/// Resets the iterator to the initial token.
|
||||
pub fn reset(self: *TokenIterator) void {
|
||||
self.index = 0;
|
||||
}
|
||||
|
||||
fn isSplitByte(self: TokenIterator, byte: u8) bool {
|
||||
for (self.delimiter_bytes) |delimiter_byte| {
|
||||
if (byte == delimiter_byte) {
|
||||
return true;
|
||||
/// Returns a slice of the next token, or null if tokenization is complete.
|
||||
pub fn next(self: *Self) ?[]const T {
|
||||
// move to beginning of token
|
||||
while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
|
||||
const start = self.index;
|
||||
if (start == self.buffer.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// move to end of token
|
||||
while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
|
||||
const end = self.index;
|
||||
|
||||
return self.buffer[start..end];
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
pub const SplitIterator = struct {
|
||||
buffer: []const u8,
|
||||
index: ?usize,
|
||||
delimiter: []const u8,
|
||||
/// Returns a slice of the remaining bytes. Does not affect iterator state.
|
||||
pub fn rest(self: Self) []const T {
|
||||
// move to beginning of token
|
||||
var index: usize = self.index;
|
||||
while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {}
|
||||
return self.buffer[index..];
|
||||
}
|
||||
|
||||
/// Returns a slice of the next field, or null if splitting is complete.
|
||||
pub fn next(self: *SplitIterator) ?[]const u8 {
|
||||
const start = self.index orelse return null;
|
||||
const end = if (indexOfPos(u8, self.buffer, start, self.delimiter)) |delim_start| blk: {
|
||||
self.index = delim_start + self.delimiter.len;
|
||||
break :blk delim_start;
|
||||
} else blk: {
|
||||
self.index = null;
|
||||
break :blk self.buffer.len;
|
||||
};
|
||||
return self.buffer[start..end];
|
||||
}
|
||||
/// Resets the iterator to the initial token.
|
||||
pub fn reset(self: *Self) void {
|
||||
self.index = 0;
|
||||
}
|
||||
|
||||
/// Returns a slice of the remaining bytes. Does not affect iterator state.
|
||||
pub fn rest(self: SplitIterator) []const u8 {
|
||||
const end = self.buffer.len;
|
||||
const start = self.index orelse end;
|
||||
return self.buffer[start..end];
|
||||
}
|
||||
};
|
||||
fn isSplitByte(self: Self, byte: T) bool {
|
||||
for (self.delimiter_bytes) |delimiter_byte| {
|
||||
if (byte == delimiter_byte) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn SplitIterator(comptime T: type) type {
|
||||
return struct {
|
||||
buffer: []const T,
|
||||
index: ?usize,
|
||||
delimiter: []const T,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Returns a slice of the next field, or null if splitting is complete.
|
||||
pub fn next(self: *Self) ?[]const T {
|
||||
const start = self.index orelse return null;
|
||||
const end = if (indexOfPos(T, self.buffer, start, self.delimiter)) |delim_start| blk: {
|
||||
self.index = delim_start + self.delimiter.len;
|
||||
break :blk delim_start;
|
||||
} else blk: {
|
||||
self.index = null;
|
||||
break :blk self.buffer.len;
|
||||
};
|
||||
return self.buffer[start..end];
|
||||
}
|
||||
|
||||
/// Returns a slice of the remaining bytes. Does not affect iterator state.
|
||||
pub fn rest(self: Self) []const T {
|
||||
const end = self.buffer.len;
|
||||
const start = self.index orelse end;
|
||||
return self.buffer[start..end];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Naively combines a series of slices with a separator.
|
||||
/// Allocates memory for the result, which must be freed by the caller.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user