zig/lib/std/zig/string_literal.zig
Frank Denis 6c2e0c2046 Year++
2020-12-31 15:45:24 -08:00

130 lines
4.6 KiB
Zig

// SPDX-License-Identifier: MIT
// Copyright (c) 2015-2021 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
const std = @import("../std.zig");
const assert = std.debug.assert;
const State = enum {
Start,
Backslash,
};
pub const ParseError = error{
OutOfMemory,
/// When this is returned, index will be the position of the character.
InvalidCharacter,
};
/// caller owns returned memory
pub fn parse(
allocator: *std.mem.Allocator,
bytes: []const u8,
bad_index: *usize, // populated if error.InvalidCharacter is returned
) ParseError![]u8 {
assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
var list = std.ArrayList(u8).init(allocator);
errdefer list.deinit();
const slice = bytes[1..];
try list.ensureCapacity(slice.len - 1);
var state = State.Start;
var index: usize = 0;
while (index < slice.len) : (index += 1) {
const b = slice[index];
switch (state) {
State.Start => switch (b) {
'\\' => state = State.Backslash,
'\n' => {
bad_index.* = index;
return error.InvalidCharacter;
},
'"' => return list.toOwnedSlice(),
else => try list.append(b),
},
State.Backslash => switch (b) {
'n' => {
try list.append('\n');
state = State.Start;
},
'r' => {
try list.append('\r');
state = State.Start;
},
'\\' => {
try list.append('\\');
state = State.Start;
},
't' => {
try list.append('\t');
state = State.Start;
},
'\'' => {
try list.append('\'');
state = State.Start;
},
'"' => {
try list.append('"');
state = State.Start;
},
'x' => {
// TODO: add more/better/broader tests for this.
const index_continue = index + 3;
if (slice.len >= index_continue)
if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |char| {
try list.append(char);
state = State.Start;
index = index_continue - 1; // loop-header increments again
continue;
} else |_| {};
bad_index.* = index;
return error.InvalidCharacter;
},
'u' => {
// TODO: add more/better/broader tests for this.
if (slice.len > index + 2 and slice[index + 1] == '{')
if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| {
const hex_str = slice[index + 2 .. index_end];
if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| {
if (uint <= 0x10ffff) {
try list.appendSlice(std.mem.toBytes(uint)[0..]);
state = State.Start;
index = index_end; // loop-header increments
continue;
}
} else |_| {}
};
bad_index.* = index;
return error.InvalidCharacter;
},
else => {
bad_index.* = index;
return error.InvalidCharacter;
},
},
}
}
unreachable;
}
test "parse" {
const expect = std.testing.expect;
const eql = std.mem.eql;
var fixed_buf_mem: [32]u8 = undefined;
var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]);
var alloc = &fixed_buf_alloc.allocator;
var bad_index: usize = undefined;
expect(eql(u8, "foo", try parse(alloc, "\"foo\"", &bad_index)));
expect(eql(u8, "foo", try parse(alloc, "\"f\x6f\x6f\"", &bad_index)));
expect(eql(u8, "f💯", try parse(alloc, "\"f\u{1f4af}\"", &bad_index)));
}