mirror of
https://github.com/ziglang/zig.git
synced 2026-01-17 12:55:16 +00:00
We already have a LICENSE file that covers the Zig Standard Library. We no longer need to remind everyone that the license is MIT in every single file. Previously this was introduced to clarify the situation for a fork of Zig that made Zig's LICENSE file harder to find, and replaced it with their own license that required annual payments to their company. However that fork now appears to be dead. So there is no need to reinforce the copyright notice in every single file.
156 lines
6.0 KiB
Zig
156 lines
6.0 KiB
Zig
const std = @import("../std.zig");
|
|
const assert = std.debug.assert;
|
|
|
|
pub const ParseError = error{
|
|
OutOfMemory,
|
|
InvalidStringLiteral,
|
|
};
|
|
|
|
pub const Result = union(enum) {
|
|
success,
|
|
/// Found an invalid character at this index.
|
|
invalid_character: usize,
|
|
/// Expected hex digits at this index.
|
|
expected_hex_digits: usize,
|
|
/// Invalid hex digits at this index.
|
|
invalid_hex_escape: usize,
|
|
/// Invalid unicode escape at this index.
|
|
invalid_unicode_escape: usize,
|
|
/// The left brace at this index is missing a matching right brace.
|
|
missing_matching_rbrace: usize,
|
|
/// Expected unicode digits at this index.
|
|
expected_unicode_digits: usize,
|
|
};
|
|
|
|
/// Parses `bytes` as a Zig string literal and appends the result to `buf`.
|
|
/// Asserts `bytes` has '"' at beginning and end.
|
|
pub fn parseAppend(buf: *std.ArrayList(u8), bytes: []const u8) error{OutOfMemory}!Result {
|
|
assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
|
|
const slice = bytes[1..];
|
|
|
|
const prev_len = buf.items.len;
|
|
try buf.ensureCapacity(prev_len + slice.len - 1);
|
|
errdefer buf.shrinkRetainingCapacity(prev_len);
|
|
|
|
const State = enum {
|
|
Start,
|
|
Backslash,
|
|
};
|
|
|
|
var state = State.Start;
|
|
var index: usize = 0;
|
|
while (true) : (index += 1) {
|
|
const b = slice[index];
|
|
|
|
switch (state) {
|
|
State.Start => switch (b) {
|
|
'\\' => state = State.Backslash,
|
|
'\n' => {
|
|
return Result{ .invalid_character = index };
|
|
},
|
|
'"' => return Result.success,
|
|
else => try buf.append(b),
|
|
},
|
|
State.Backslash => switch (b) {
|
|
'n' => {
|
|
try buf.append('\n');
|
|
state = State.Start;
|
|
},
|
|
'r' => {
|
|
try buf.append('\r');
|
|
state = State.Start;
|
|
},
|
|
'\\' => {
|
|
try buf.append('\\');
|
|
state = State.Start;
|
|
},
|
|
't' => {
|
|
try buf.append('\t');
|
|
state = State.Start;
|
|
},
|
|
'\'' => {
|
|
try buf.append('\'');
|
|
state = State.Start;
|
|
},
|
|
'"' => {
|
|
try buf.append('"');
|
|
state = State.Start;
|
|
},
|
|
'x' => {
|
|
// TODO: add more/better/broader tests for this.
|
|
const index_continue = index + 3;
|
|
if (slice.len < index_continue) {
|
|
return Result{ .expected_hex_digits = index };
|
|
}
|
|
if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |byte| {
|
|
try buf.append(byte);
|
|
state = State.Start;
|
|
index = index_continue - 1; // loop-header increments again
|
|
} else |err| switch (err) {
|
|
error.Overflow => unreachable, // 2 digits base 16 fits in a u8.
|
|
error.InvalidCharacter => {
|
|
return Result{ .invalid_hex_escape = index + 1 };
|
|
},
|
|
}
|
|
},
|
|
'u' => {
|
|
// TODO: add more/better/broader tests for this.
|
|
// TODO: we are already inside a nice, clean state machine... use it
|
|
// instead of this hacky code.
|
|
if (slice.len > index + 2 and slice[index + 1] == '{') {
|
|
if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| {
|
|
const hex_str = slice[index + 2 .. index_end];
|
|
if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| {
|
|
if (uint <= 0x10ffff) {
|
|
// TODO this incorrectly depends on endianness
|
|
try buf.appendSlice(std.mem.toBytes(uint)[0..]);
|
|
state = State.Start;
|
|
index = index_end; // loop-header increments
|
|
continue;
|
|
}
|
|
} else |err| switch (err) {
|
|
error.Overflow => unreachable,
|
|
error.InvalidCharacter => {
|
|
return Result{ .invalid_unicode_escape = index + 1 };
|
|
},
|
|
}
|
|
} else {
|
|
return Result{ .missing_matching_rbrace = index + 1 };
|
|
}
|
|
} else {
|
|
return Result{ .expected_unicode_digits = index };
|
|
}
|
|
},
|
|
else => {
|
|
return Result{ .invalid_character = index };
|
|
},
|
|
},
|
|
}
|
|
} else unreachable; // TODO should not need else unreachable on while(true)
|
|
}
|
|
|
|
/// Higher level API. Does not return extra info about parse errors.
|
|
/// Caller owns returned memory.
|
|
pub fn parseAlloc(allocator: *std.mem.Allocator, bytes: []const u8) ParseError![]u8 {
|
|
var buf = std.ArrayList(u8).init(allocator);
|
|
defer buf.deinit();
|
|
|
|
switch (try parseAppend(&buf, bytes)) {
|
|
.success => return buf.toOwnedSlice(),
|
|
else => return error.InvalidStringLiteral,
|
|
}
|
|
}
|
|
|
|
test "parse" {
|
|
const expect = std.testing.expect;
|
|
const eql = std.mem.eql;
|
|
|
|
var fixed_buf_mem: [32]u8 = undefined;
|
|
var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]);
|
|
var alloc = &fixed_buf_alloc.allocator;
|
|
|
|
try expect(eql(u8, "foo", try parseAlloc(alloc, "\"foo\"")));
|
|
try expect(eql(u8, "foo", try parseAlloc(alloc, "\"f\x6f\x6f\"")));
|
|
try expect(eql(u8, "f💯", try parseAlloc(alloc, "\"f\u{1f4af}\"")));
|
|
}
|