std: add json.parse to automatically decode json into a struct

This commit is contained in:
daurnimator 2019-12-29 17:46:32 +11:00
parent 5a20604820
commit d989396a34
No known key found for this signature in database
GPG Key ID: 45B429A8F9D9D22A

View File

@ -19,6 +19,74 @@ const StringEscapes = union(enum) {
},
};
/// Checks to see if a string matches what it would be as a json-encoded string
/// Assumes that `encoded` is a well-formed json string
fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
var i: usize = 0;
var j: usize = 0;
while (i < decoded.len) {
if (j >= encoded.len) return false;
if (encoded[j] != '\\') {
if (decoded[i] != encoded[j]) return false;
j += 1;
i += 1;
} else {
const escape_type = encoded[j + 1];
if (escape_type != 'u') {
const t: u8 = switch (escape_type) {
'\\' => '\\',
'/' => '/',
'n' => '\n',
'r' => '\r',
't' => '\t',
'f' => 12,
'b' => 8,
'"' => '"',
else => unreachable,
};
if (decoded[i] != t) return false;
j += 2;
i += 1;
} else {
var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
j += 6;
if (codepoint >= 0xD800 and codepoint < 0xDC00) {
// surrogate pair
assert(encoded[j] == '\\');
assert(encoded[j + 1] == 'u');
const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
j += 6;
}
var buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
if (i + len > decoded.len) return false;
if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false;
i += len;
}
}
}
assert(i == decoded.len);
assert(j == encoded.len);
return true;
}
test "encodesTo" {
// same
testing.expectEqual(true, encodesTo("false", "false"));
// totally different
testing.expectEqual(false, encodesTo("false", "true"));
// differnt lengths
testing.expectEqual(false, encodesTo("false", "other"));
// with escape
testing.expectEqual(true, encodesTo("\\", "\\\\"));
testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape"));
// with unicode
testing.expectEqual(true, encodesTo("ą", "\\u0105"));
testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02"));
testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02"));
}
/// A single token slice into the parent string.
///
/// Use `token.slice()` on the input at the current position to get the current slice.
@ -1201,6 +1269,490 @@ pub const Value = union(enum) {
}
};
pub const ParseOptions = struct {
allocator: ?*Allocator = null,
/// Behaviour when a duplicate field is encountered.
duplicate_field_behavior: enum {
UseFirst,
Error,
UseLast,
} = .Error,
};
fn parseInternal(comptime T: type, token: Token, tokens: *TokenStream, options: ParseOptions) !T {
switch (@typeInfo(T)) {
.Bool => {
return switch (token) {
.True => true,
.False => false,
else => error.UnexpectedToken,
};
},
.Float, .ComptimeFloat => {
const numberToken = switch (token) {
.Number => |n| n,
else => return error.UnexpectedToken,
};
return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1));
},
.Int, .ComptimeInt => {
const numberToken = switch (token) {
.Number => |n| n,
else => return error.UnexpectedToken,
};
if (!numberToken.is_integer) return error.UnexpectedToken;
return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
},
.Optional => |optionalInfo| {
if (token == .Null) {
return null;
} else {
return try parseInternal(optionalInfo.child, token, tokens, options);
}
},
.Enum => |enumInfo| {
switch (token) {
.Number => |numberToken| {
if (!numberToken.is_integer) return error.UnexpectedToken;
const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
return try std.meta.intToEnum(T, n);
},
.String => |stringToken| {
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
.Some => {
inline for (enumInfo.fields) |field| {
if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
return @field(T, field.name);
}
}
return error.InvalidEnumTag;
},
}
},
else => return error.UnexpectedToken,
}
},
.Union => |unionInfo| {
if (unionInfo.tag_type) |_| {
// try each of the union fields until we find one that matches
inline for (unionInfo.fields) |u_field| {
if (parseInternal(u_field.field_type, token, tokens, options)) |value| {
return @unionInit(T, u_field.name, value);
} else |err| {
// Bubble up error.OutOfMemory
// Parsing some types won't have OutOfMemory in their
// error-sets, for the condition to be valid, merge it in.
if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
// otherwise continue through the `inline for`
}
}
return error.NoUnionMembersMatched;
} else {
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
}
},
.Struct => |structInfo| {
switch (token) {
.ObjectBegin => {},
else => return error.UnexpectedToken,
}
var r: T = undefined;
var fields_seen = [_]bool{false} ** structInfo.fields.len;
errdefer {
inline for (structInfo.fields) |field, i| {
if (fields_seen[i]) {
parseFree(field.field_type, @field(r, field.name), options);
}
}
}
while (true) {
switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
.ObjectEnd => break,
.String => |stringToken| {
const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
var found = false;
inline for (structInfo.fields) |field, i| {
// TODO: using switches here segfault the compiler (#2727?)
if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)))) {
// if (switch (stringToken.escapes) {
// .None => mem.eql(u8, field.name, key_source_slice),
// .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
// }) {
if (fields_seen[i]) {
// switch (options.duplicate_field_behavior) {
// .UseFirst => {},
// .Error => {},
// .UseLast => {},
// }
if (options.duplicate_field_behavior == .UseFirst) {
break;
} else if (options.duplicate_field_behavior == .Error) {
return error.DuplicateJSONField;
} else if (options.duplicate_field_behavior == .UseLast) {
parseFree(field.field_type, @field(r, field.name), options);
}
}
@field(r, field.name) = try parse(field.field_type, tokens, options);
fields_seen[i] = true;
found = true;
break;
}
}
if (!found) return error.UnknownField;
},
else => return error.UnexpectedToken,
}
}
inline for (structInfo.fields) |field, i| {
if (!fields_seen[i]) {
if (field.default_value) |default| {
@field(r, field.name) = default;
} else {
return error.MissingField;
}
}
}
return r;
},
.Array => |arrayInfo| {
switch (token) {
.ArrayBegin => {
var r: T = undefined;
var i: usize = 0;
errdefer {
while (true) : (i -= 1) {
parseFree(arrayInfo.child, r[i], options);
if (i == 0) break;
}
}
while (i < r.len) : (i += 1) {
r[i] = try parse(arrayInfo.child, tokens, options);
}
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
switch (tok) {
.ArrayEnd => {},
else => return error.UnexpectedToken,
}
return r;
},
.String => |stringToken| {
if (arrayInfo.child != u8) return error.UnexpectedToken;
var r: T = undefined;
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => mem.copy(u8, &r, source_slice),
.Some => try unescapeString(&r, source_slice),
}
return r;
},
else => return error.UnexpectedToken,
}
},
.Pointer => |ptrInfo| {
const allocator = options.allocator orelse return error.AllocatorRequired;
switch (ptrInfo.size) {
.One => {
const r: T = allocator.create(ptrInfo.child);
r.* = try parseInternal(ptrInfo.child, token, tokens, options);
return r;
},
.Slice => {
switch (token) {
.ArrayBegin => {
var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
errdefer {
while (arraylist.popOrNull()) |v| {
parseFree(ptrInfo.child, v, options);
}
arraylist.deinit();
}
while (true) {
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
switch (tok) {
.ArrayEnd => break,
else => {},
}
try arraylist.ensureCapacity(arraylist.len + 1);
const v = try parseInternal(ptrInfo.child, tok, tokens, options);
arraylist.appendAssumeCapacity(v);
}
return arraylist.toOwnedSlice();
},
.String => |stringToken| {
if (ptrInfo.child != u8) return error.UnexpectedToken;
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => return mem.dupe(allocator, u8, source_slice),
.Some => |some_escapes| {
const output = try allocator.alloc(u8, stringToken.decodedLength());
errdefer allocator.free(output);
try unescapeString(output, source_slice);
return output;
},
}
},
else => return error.UnexpectedToken,
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
unreachable;
}
pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) !T {
const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
return parseInternal(T, token, tokens, options);
}
/// Releases resources created by `parse`.
/// Should be called with the same type and `ParseOptions` that were passed to `parse`
pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void {
switch (@typeInfo(T)) {
.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
.Optional => {
if (value) |v| {
return parseFree(@TypeOf(v), v, options);
}
},
.Union => |unionInfo| {
if (unionInfo.tag_type) |UnionTagType| {
inline for (unionInfo.fields) |u_field| {
if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) {
parseFree(u_field.field_type, @field(value, u_field.name), options);
break;
}
}
} else {
unreachable;
}
},
.Struct => |structInfo| {
inline for (structInfo.fields) |field| {
parseFree(field.field_type, @field(value, field.name), options);
}
},
.Array => |arrayInfo| {
for (value) |v| {
parseFree(arrayInfo.child, v, options);
}
},
.Pointer => |ptrInfo| {
const allocator = options.allocator orelse unreachable;
switch (ptrInfo.size) {
.One => {
parseFree(ptrInfo.child, value.*, options);
allocator.destroy(v);
},
.Slice => {
for (value) |v| {
parseFree(ptrInfo.child, v, options);
}
allocator.free(value);
},
else => unreachable,
}
},
else => unreachable,
}
}
test "parse" {
testing.expectEqual(false, try parse(bool, &TokenStream.init("false"), ParseOptions{}));
testing.expectEqual(true, try parse(bool, &TokenStream.init("true"), ParseOptions{}));
testing.expectEqual(@as(u1, 1), try parse(u1, &TokenStream.init("1"), ParseOptions{}));
testing.expectError(error.Overflow, parse(u1, &TokenStream.init("50"), ParseOptions{}));
testing.expectEqual(@as(u64, 42), try parse(u64, &TokenStream.init("42"), ParseOptions{}));
testing.expectEqual(@as(f64, 42), try parse(f64, &TokenStream.init("42.0"), ParseOptions{}));
testing.expectEqual(@as(?bool, null), try parse(?bool, &TokenStream.init("null"), ParseOptions{}));
testing.expectEqual(@as(?bool, true), try parse(?bool, &TokenStream.init("true"), ParseOptions{}));
testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("[102, 111, 111]"), ParseOptions{}));
}
test "parse into enum" {
const T = extern enum {
Foo = 42,
Bar,
@"with\\escape",
};
testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("\"Foo\""), ParseOptions{}));
testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("42"), ParseOptions{}));
testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &TokenStream.init("\"with\\\\escape\""), ParseOptions{}));
testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("5"), ParseOptions{}));
testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("\"Qux\""), ParseOptions{}));
}
test "parse into that allocates a slice" {
testing.expectError(error.AllocatorRequired, parse([]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
const options = ParseOptions{ .allocator = testing.allocator };
{
const r = try parse([]u8, &TokenStream.init("\"foo\""), options);
defer parseFree([]u8, r, options);
testing.expectEqualSlices(u8, "foo", r);
}
{
const r = try parse([]u8, &TokenStream.init("[102, 111, 111]"), options);
defer parseFree([]u8, r, options);
testing.expectEqualSlices(u8, "foo", r);
}
{
const r = try parse([]u8, &TokenStream.init("\"with\\\\escape\""), options);
defer parseFree([]u8, r, options);
testing.expectEqualSlices(u8, "with\\escape", r);
}
}
test "parse into tagged union" {
{
const T = union(enum) {
int: i32,
float: f64,
string: []const u8,
};
testing.expectEqual(T{ .float = 1.5 }, try parse(T, &TokenStream.init("1.5"), ParseOptions{}));
}
{ // if union matches string member, fails with NoUnionMembersMatched rather than AllocatorRequired
// Note that this behaviour wasn't necessarily by design, but was
// what fell out of the implementation and may result in interesting
// API breakage if changed
const T = union(enum) {
int: i32,
float: f64,
string: []const u8,
};
testing.expectError(error.NoUnionMembersMatched, parse(T, &TokenStream.init("\"foo\""), ParseOptions{}));
}
{ // failing allocations should be bubbled up instantly without trying next member
var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0);
const options = ParseOptions{ .allocator = &fail_alloc.allocator };
const T = union(enum) {
// both fields here match the input
string: []const u8,
array: [3]u8,
};
testing.expectError(error.OutOfMemory, parse(T, &TokenStream.init("[1,2,3]"), options));
}
{
// if multiple matches possible, takes first option
const T = union(enum) {
x: u8,
y: u8,
};
testing.expectEqual(T{ .x = 42 }, try parse(T, &TokenStream.init("42"), ParseOptions{}));
}
}
test "parseFree descends into tagged union" {
var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1);
const options = ParseOptions{ .allocator = &fail_alloc.allocator };
const T = union(enum) {
int: i32,
float: f64,
string: []const u8,
};
// use a string with unicode escape so we know result can't be a reference to global constant
const r = try parse(T, &TokenStream.init("\"with\\u0105unicode\""), options);
testing.expectEqual(@TagType(T).string, @as(@TagType(T), r));
testing.expectEqualSlices(u8, "withąunicode", r.string);
testing.expectEqual(@as(usize, 0), fail_alloc.deallocations);
parseFree(T, r, options);
testing.expectEqual(@as(usize, 1), fail_alloc.deallocations);
}
test "parse into struct with no fields" {
const T = struct {};
testing.expectEqual(T{}, try parse(T, &TokenStream.init("{}"), ParseOptions{}));
}
test "parse into struct with misc fields" {
@setEvalBranchQuota(10000);
const options = ParseOptions{ .allocator = testing.allocator };
const T = struct {
int: i64,
float: f64,
@"with\\escape": bool,
@"withąunicode😂": bool,
language: []const u8,
optional: ?bool,
default_field: i32 = 42,
static_array: [3]f64,
dynamic_array: []f64,
const Bar = struct {
nested: []const u8,
};
complex: Bar,
const Baz = struct {
foo: []const u8,
};
veryComplex: []Baz,
const Union = union(enum) {
x: u8,
float: f64,
string: []const u8,
};
a_union: Union,
};
const r = try parse(T, &TokenStream.init(
\\{
\\ "int": 420,
\\ "float": 3.14,
\\ "with\\escape": true,
\\ "with\u0105unicode\ud83d\ude02": false,
\\ "language": "zig",
\\ "optional": null,
\\ "static_array": [66.6, 420.420, 69.69],
\\ "dynamic_array": [66.6, 420.420, 69.69],
\\ "complex": {
\\ "nested": "zig"
\\ },
\\ "veryComplex": [
\\ {
\\ "foo": "zig"
\\ }, {
\\ "foo": "rocks"
\\ }
\\ ],
\\ "a_union": 100000
\\}
), options);
defer parseFree(T, r, options);
testing.expectEqual(@as(i64, 420), r.int);
testing.expectEqual(@as(f64, 3.14), r.float);
testing.expectEqual(true, r.@"with\\escape");
testing.expectEqual(false, r.@"withąunicode😂");
testing.expectEqualSlices(u8, "zig", r.language);
testing.expectEqual(@as(?bool, null), r.optional);
testing.expectEqual(@as(i32, 42), r.default_field);
testing.expectEqual(@as(f64, 66.6), r.static_array[0]);
testing.expectEqual(@as(f64, 420.420), r.static_array[1]);
testing.expectEqual(@as(f64, 69.69), r.static_array[2]);
testing.expectEqual(@as(usize, 3), r.dynamic_array.len);
testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]);
testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]);
testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]);
testing.expectEqualSlices(u8, r.complex.nested, "zig");
testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo);
testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo);
testing.expectEqual(T.Union{ .float = 100000 }, r.a_union);
}
/// A non-stream JSON parser which constructs a tree of Value's.
pub const Parser = struct {
allocator: *Allocator,