From 874d2dd9f77b60a7eb6b2af3c34bb02783b0ec85 Mon Sep 17 00:00:00 2001 From: Josh Wolfe Date: Sun, 9 Jul 2023 22:18:59 -0400 Subject: [PATCH] std.json: add generic hash map that parses/stringifies with arbitrary string keys (#16366) * expose innerParseFromValue --- lib/std/json.zig | 4 + lib/std/json/hashmap.zig | 103 +++++++++++++++++++++++++ lib/std/json/hashmap_test.zig | 139 ++++++++++++++++++++++++++++++++++ lib/std/json/static.zig | 45 +++++------ 4 files changed, 266 insertions(+), 25 deletions(-) create mode 100644 lib/std/json/hashmap.zig create mode 100644 lib/std/json/hashmap_test.zig diff --git a/lib/std/json.zig b/lib/std/json.zig index 87603cd4e9..f8480d4207 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -69,6 +69,8 @@ pub const ObjectMap = @import("json/dynamic.zig").ObjectMap; pub const Array = @import("json/dynamic.zig").Array; pub const Value = @import("json/dynamic.zig").Value; +pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap; + pub const validate = @import("json/scanner.zig").validate; pub const Error = @import("json/scanner.zig").Error; pub const reader = @import("json/scanner.zig").reader; @@ -91,6 +93,7 @@ pub const parseFromTokenSourceLeaky = @import("json/static.zig").parseFromTokenS pub const innerParse = @import("json/static.zig").innerParse; pub const parseFromValue = @import("json/static.zig").parseFromValue; pub const parseFromValueLeaky = @import("json/static.zig").parseFromValueLeaky; +pub const innerParseFromValue = @import("json/static.zig").innerParseFromValue; pub const ParseError = @import("json/static.zig").ParseError; pub const ParseFromValueError = @import("json/static.zig").ParseFromValueError; @@ -116,6 +119,7 @@ test { _ = @import("json/scanner.zig"); _ = @import("json/write_stream.zig"); _ = @import("json/dynamic.zig"); + _ = @import("json/hashmap_test.zig"); _ = @import("json/static.zig"); _ = @import("json/stringify.zig"); _ = @import("json/JSONTestSuite_test.zig"); diff --git a/lib/std/json/hashmap.zig b/lib/std/json/hashmap.zig new file mode 100644 index 0000000000..320592589a --- /dev/null +++ b/lib/std/json/hashmap.zig @@ -0,0 +1,103 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const ParseOptions = @import("static.zig").ParseOptions; +const innerParse = @import("static.zig").innerParse; +const innerParseFromValue = @import("static.zig").innerParseFromValue; +const Value = @import("dynamic.zig").Value; +const StringifyOptions = @import("stringify.zig").StringifyOptions; +const stringify = @import("stringify.zig").stringify; +const encodeJsonString = @import("stringify.zig").encodeJsonString; + +/// A thin wrapper around `std.StringArrayHashMapUnmanaged` that implements +/// `jsonParse`, `jsonParseFromValue`, and `jsonStringify`. +/// This is useful when your JSON schema has an object with arbitrary data keys +/// instead of comptime-known struct field names. +pub fn ArrayHashMap(comptime T: type) type { + return struct { + map: std.StringArrayHashMapUnmanaged(T) = .{}, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.map.deinit(allocator); + } + + pub fn jsonParse(allocator: Allocator, source: anytype, options: ParseOptions) !@This() { + var map = std.StringArrayHashMapUnmanaged(T){}; + errdefer map.deinit(allocator); + + if (.object_begin != try source.next()) return error.UnexpectedToken; + while (true) { + const token = try source.nextAlloc(allocator, .alloc_if_needed); + switch (token) { + inline .string, .allocated_string => |k| { + const gop = try map.getOrPut(allocator, k); + if (token == .allocated_string) { + // Free the key before recursing in case we're using an allocator + // that optimizes freeing the last allocated object. + allocator.free(k); + } + if (gop.found_existing) { + switch (options.duplicate_field_behavior) { + .use_first => { + // Parse and ignore the redundant value. + // We don't want to skip the value, because we want type checking. + _ = try innerParse(T, allocator, source, options); + continue; + }, + .@"error" => return error.DuplicateField, + .use_last => {}, + } + } + gop.value_ptr.* = try innerParse(T, allocator, source, options); + }, + .object_end => break, + else => unreachable, + } + } + return .{ .map = map }; + } + + pub fn jsonParseFromValue(allocator: Allocator, source: Value, options: ParseOptions) !@This() { + if (source != .object) return error.UnexpectedToken; + + var map = std.StringArrayHashMapUnmanaged(T){}; + errdefer map.deinit(allocator); + + var it = source.object.iterator(); + while (it.next()) |kv| { + try map.put(allocator, kv.key_ptr.*, try innerParseFromValue(T, allocator, kv.value_ptr.*, options)); + } + return .{ .map = map }; + } + + pub fn jsonStringify(self: @This(), options: StringifyOptions, out_stream: anytype) !void { + try out_stream.writeByte('{'); + var field_output = false; + var child_options = options; + child_options.whitespace.indent_level += 1; + var it = self.map.iterator(); + while (it.next()) |kv| { + if (!field_output) { + field_output = true; + } else { + try out_stream.writeByte(','); + } + try child_options.whitespace.outputIndent(out_stream); + try encodeJsonString(kv.key_ptr.*, options, out_stream); + try out_stream.writeByte(':'); + if (child_options.whitespace.separator) { + try out_stream.writeByte(' '); + } + try stringify(kv.value_ptr.*, child_options, out_stream); + } + if (field_output) { + try options.whitespace.outputIndent(out_stream); + } + try out_stream.writeByte('}'); + } + }; +} + +test { + _ = @import("hashmap_test.zig"); +} diff --git a/lib/std/json/hashmap_test.zig b/lib/std/json/hashmap_test.zig new file mode 100644 index 0000000000..3baead972a --- /dev/null +++ b/lib/std/json/hashmap_test.zig @@ -0,0 +1,139 @@ +const std = @import("std"); +const testing = std.testing; + +const ArrayHashMap = @import("hashmap.zig").ArrayHashMap; + +const parseFromSlice = @import("static.zig").parseFromSlice; +const parseFromSliceLeaky = @import("static.zig").parseFromSliceLeaky; +const parseFromValue = @import("static.zig").parseFromValue; +const stringifyAlloc = @import("stringify.zig").stringifyAlloc; +const Value = @import("dynamic.zig").Value; + +const T = struct { + i: i32, + s: []const u8, +}; + +test "parse json hashmap" { + const doc = + \\{ + \\ "abc": {"i": 0, "s": "d"}, + \\ "xyz": {"i": 1, "s": "w"} + \\} + ; + const parsed = try parseFromSlice(ArrayHashMap(T), testing.allocator, doc, .{}); + defer parsed.deinit(); + + try testing.expectEqual(@as(usize, 2), parsed.value.map.count()); + try testing.expectEqualStrings("d", parsed.value.map.get("abc").?.s); + try testing.expectEqual(@as(i32, 1), parsed.value.map.get("xyz").?.i); +} + +test "parse json hashmap duplicate fields" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const doc = + \\{ + \\ "abc": {"i": 0, "s": "d"}, + \\ "abc": {"i": 1, "s": "w"} + \\} + ; + + try testing.expectError(error.DuplicateField, parseFromSliceLeaky(ArrayHashMap(T), arena.allocator(), doc, .{ + .duplicate_field_behavior = .@"error", + })); + + const first = try parseFromSliceLeaky(ArrayHashMap(T), arena.allocator(), doc, .{ + .duplicate_field_behavior = .use_first, + }); + try testing.expectEqual(@as(usize, 1), first.map.count()); + try testing.expectEqual(@as(i32, 0), first.map.get("abc").?.i); + + const last = try parseFromSliceLeaky(ArrayHashMap(T), arena.allocator(), doc, .{ + .duplicate_field_behavior = .use_last, + }); + try testing.expectEqual(@as(usize, 1), last.map.count()); + try testing.expectEqual(@as(i32, 1), last.map.get("abc").?.i); +} + +test "stringify json hashmap" { + var value = ArrayHashMap(T){}; + defer value.deinit(testing.allocator); + { + const doc = try stringifyAlloc(testing.allocator, value, .{}); + defer testing.allocator.free(doc); + try testing.expectEqualStrings("{}", doc); + } + + try value.map.put(testing.allocator, "abc", .{ .i = 0, .s = "d" }); + try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" }); + + { + const doc = try stringifyAlloc(testing.allocator, value, .{}); + defer testing.allocator.free(doc); + try testing.expectEqualStrings( + \\{"abc":{"i":0,"s":"d"},"xyz":{"i":1,"s":"w"}} + , doc); + } + + try testing.expect(value.map.swapRemove("abc")); + { + const doc = try stringifyAlloc(testing.allocator, value, .{}); + defer testing.allocator.free(doc); + try testing.expectEqualStrings( + \\{"xyz":{"i":1,"s":"w"}} + , doc); + } + + try testing.expect(value.map.swapRemove("xyz")); + { + const doc = try stringifyAlloc(testing.allocator, value, .{}); + defer testing.allocator.free(doc); + try testing.expectEqualStrings("{}", doc); + } +} + +test "stringify json hashmap whitespace" { + var value = ArrayHashMap(T){}; + defer value.deinit(testing.allocator); + try value.map.put(testing.allocator, "abc", .{ .i = 0, .s = "d" }); + try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" }); + + { + const doc = try stringifyAlloc(testing.allocator, value, .{ + .whitespace = .{ + .indent = .{ .space = 2 }, + }, + }); + defer testing.allocator.free(doc); + try testing.expectEqualStrings( + \\{ + \\ "abc": { + \\ "i": 0, + \\ "s": "d" + \\ }, + \\ "xyz": { + \\ "i": 1, + \\ "s": "w" + \\ } + \\} + , doc); + } +} + +test "json parse from value hashmap" { + const doc = + \\{ + \\ "abc": {"i": 0, "s": "d"}, + \\ "xyz": {"i": 1, "s": "w"} + \\} + ; + const parsed1 = try parseFromSlice(Value, testing.allocator, doc, .{}); + defer parsed1.deinit(); + + const parsed2 = try parseFromValue(ArrayHashMap(T), testing.allocator, parsed1.value, .{}); + defer parsed2.deinit(); + + try testing.expectEqualStrings("d", parsed2.value.map.get("abc").?.s); +} diff --git a/lib/std/json/static.zig b/lib/std/json/static.zig index b48b2a2a13..a46bc33ecd 100644 --- a/lib/std/json/static.zig +++ b/lib/std/json/static.zig @@ -145,6 +145,7 @@ pub fn parseFromTokenSourceLeaky( } /// Like `parseFromSlice`, but the input is an already-parsed `std.json.Value` object. +/// Only `options.ignore_unknown_fields` is used from `options`. pub fn parseFromValue( comptime T: type, allocator: Allocator, @@ -173,7 +174,7 @@ pub fn parseFromValueLeaky( // I guess this function doesn't need to exist, // but the flow of the sourcecode is easy to follow and grouped nicely with // this pub redirect function near the top and the implementation near the bottom. - return internalParseFromValue(T, allocator, source, options); + return innerParseFromValue(T, allocator, source, options); } /// The error set that will be returned when parsing from `*Source`. @@ -199,7 +200,7 @@ pub const ParseFromValueError = std.fmt.ParseIntError || std.fmt.ParseFloatError /// during the implementation of `parseFromTokenSourceLeaky` and similar. /// It is exposed primarily to enable custom `jsonParse()` methods to call back into the `parseFrom*` system, /// such as if you're implementing a custom container of type `T`; -/// you can call `internalParse(T, ...)` for each of the container's items. +/// you can call `innerParse(T, ...)` for each of the container's items. /// Note that `null` fields are not allowed on the `options` when calling this function. /// (The `options` you get in your `jsonParse` method has no `null` fields.) pub fn innerParse( @@ -528,7 +529,12 @@ fn internalParseArray( return r; } -fn internalParseFromValue( +/// This is an internal function called recursively +/// during the implementation of `parseFromValueLeaky`. +/// It is exposed primarily to enable custom `jsonParseFromValue()` methods to call back into the `parseFromValue*` system, +/// such as if you're implementing a custom container of type `T`; +/// you can call `innerParseFromValue(T, ...)` for each of the container's items. +pub fn innerParseFromValue( comptime T: type, allocator: Allocator, source: Value, @@ -571,7 +577,7 @@ fn internalParseFromValue( .Optional => |optionalInfo| { switch (source) { .null => return null, - else => return try internalParseFromValue(optionalInfo.child, allocator, source, options), + else => return try innerParseFromValue(optionalInfo.child, allocator, source, options), } }, .Enum => { @@ -609,7 +615,7 @@ fn internalParseFromValue( return @unionInit(T, u_field.name, {}); } // Recurse. - return @unionInit(T, u_field.name, try internalParseFromValue(u_field.type, allocator, kv.value_ptr.*, options)); + return @unionInit(T, u_field.name, try innerParseFromValue(u_field.type, allocator, kv.value_ptr.*, options)); } } // Didn't match anything. @@ -623,7 +629,7 @@ fn internalParseFromValue( var r: T = undefined; inline for (0..structInfo.fields.len, source.array.items) |i, item| { - r[i] = try internalParseFromValue(structInfo.fields[i].type, allocator, item, options); + r[i] = try innerParseFromValue(structInfo.fields[i].type, allocator, item, options); } return r; @@ -645,19 +651,8 @@ fn internalParseFromValue( inline for (structInfo.fields, 0..) |field, i| { if (field.is_comptime) @compileError("comptime fields are not supported: " ++ @typeName(T) ++ "." ++ field.name); if (std.mem.eql(u8, field.name, field_name)) { - if (fields_seen[i]) { - switch (options.duplicate_field_behavior) { - .use_first => { - // Parse and ignore the redundant value. - // We don't want to skip the value, because we want type checking. - _ = try internalParseFromValue(field.type, allocator, kv.value_ptr.*, options); - break; - }, - .@"error" => return error.DuplicateField, - .use_last => {}, - } - } - @field(r, field.name) = try internalParseFromValue(field.type, allocator, kv.value_ptr.*, options); + assert(!fields_seen[i]); // Can't have duplicate keys in a Value.object. + @field(r, field.name) = try innerParseFromValue(field.type, allocator, kv.value_ptr.*, options); fields_seen[i] = true; break; } @@ -674,7 +669,7 @@ fn internalParseFromValue( switch (source) { .array => |array| { // Typical array. - return internalParseArrayFromArrayValue(T, arrayInfo.child, arrayInfo.len, allocator, array, options); + return innerParseArrayFromArrayValue(T, arrayInfo.child, arrayInfo.len, allocator, array, options); }, .string => |s| { if (arrayInfo.child != u8) return error.UnexpectedToken; @@ -694,7 +689,7 @@ fn internalParseFromValue( .Vector => |vecInfo| { switch (source) { .array => |array| { - return internalParseArrayFromArrayValue(T, vecInfo.child, vecInfo.len, allocator, array, options); + return innerParseArrayFromArrayValue(T, vecInfo.child, vecInfo.len, allocator, array, options); }, else => return error.UnexpectedToken, } @@ -704,7 +699,7 @@ fn internalParseFromValue( switch (ptrInfo.size) { .One => { const r: *ptrInfo.child = try allocator.create(ptrInfo.child); - r.* = try internalParseFromValue(ptrInfo.child, allocator, source, options); + r.* = try innerParseFromValue(ptrInfo.child, allocator, source, options); return r; }, .Slice => { @@ -716,7 +711,7 @@ fn internalParseFromValue( try allocator.alloc(ptrInfo.child, array.items.len); for (array.items, r) |item, *dest| { - dest.* = try internalParseFromValue(ptrInfo.child, allocator, item, options); + dest.* = try innerParseFromValue(ptrInfo.child, allocator, item, options); } return r; @@ -743,7 +738,7 @@ fn internalParseFromValue( } } -fn internalParseArrayFromArrayValue( +fn innerParseArrayFromArrayValue( comptime T: type, comptime Child: type, comptime len: comptime_int, @@ -755,7 +750,7 @@ fn internalParseArrayFromArrayValue( var r: T = undefined; for (array.items, 0..) |item, i| { - r[i] = try internalParseFromValue(Child, allocator, item, options); + r[i] = try innerParseFromValue(Child, allocator, item, options); } return r;