diff --git a/lib/std/json.zig b/lib/std/json.zig index c0fb064c6a..f81ac1cd65 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -10,8 +10,8 @@ //! The high-level `stringify` serializes a Zig or `Value` type into JSON. const builtin = @import("builtin"); -const testing = @import("std").testing; -const ArrayList = @import("std").ArrayList; +const std = @import("std"); +const testing = std.testing; test Scanner { var scanner = Scanner.initCompleteInput(testing.allocator, "{\"foo\": 123}\n"); @@ -41,11 +41,13 @@ test Value { try testing.expectEqualSlices(u8, "goes", parsed.value.object.get("anything").?.string); } -test writeStream { - var out = ArrayList(u8).init(testing.allocator); +test Stringify { + var out: std.io.Writer.Allocating = .init(testing.allocator); + var write_stream: Stringify = .{ + .writer = &out.writer, + .options = .{ .whitespace = .indent_2 }, + }; defer out.deinit(); - var write_stream = writeStream(out.writer(), .{ .whitespace = .indent_2 }); - defer write_stream.deinit(); try write_stream.beginObject(); try write_stream.objectField("foo"); try write_stream.write(123); @@ -55,16 +57,7 @@ test writeStream { \\ "foo": 123 \\} ; - try testing.expectEqualSlices(u8, expected, out.items); -} - -test stringify { - var out = ArrayList(u8).init(testing.allocator); - defer out.deinit(); - - const T = struct { a: i32, b: []const u8 }; - try stringify(T{ .a = 123, .b = "xy" }, .{}, out.writer()); - try testing.expectEqualSlices(u8, "{\"a\":123,\"b\":\"xy\"}", out.items); + try testing.expectEqualSlices(u8, expected, out.getWritten()); } pub const ObjectMap = @import("json/dynamic.zig").ObjectMap; @@ -73,18 +66,18 @@ pub const Value = @import("json/dynamic.zig").Value; pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap; -pub const validate = @import("json/scanner.zig").validate; -pub const Error = @import("json/scanner.zig").Error; -pub const reader = @import("json/scanner.zig").reader; -pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size; -pub const Token = @import("json/scanner.zig").Token; -pub const TokenType = @import("json/scanner.zig").TokenType; -pub const Diagnostics = @import("json/scanner.zig").Diagnostics; -pub const AllocWhen = @import("json/scanner.zig").AllocWhen; -pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len; -pub const Reader = @import("json/scanner.zig").Reader; -pub const Scanner = @import("json/scanner.zig").Scanner; -pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger; +pub const Scanner = @import("json/Scanner.zig"); +pub const validate = Scanner.validate; +pub const Error = Scanner.Error; +pub const reader = Scanner.reader; +pub const default_buffer_size = Scanner.default_buffer_size; +pub const Token = Scanner.Token; +pub const TokenType = Scanner.TokenType; +pub const Diagnostics = Scanner.Diagnostics; +pub const AllocWhen = Scanner.AllocWhen; +pub const default_max_value_len = Scanner.default_max_value_len; +pub const Reader = Scanner.Reader; +pub const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger; pub const ParseOptions = @import("json/static.zig").ParseOptions; pub const Parsed = @import("json/static.zig").Parsed; @@ -99,27 +92,49 @@ pub const innerParseFromValue = @import("json/static.zig").innerParseFromValue; pub const ParseError = @import("json/static.zig").ParseError; pub const ParseFromValueError = @import("json/static.zig").ParseFromValueError; -pub const StringifyOptions = @import("json/stringify.zig").StringifyOptions; -pub const stringify = @import("json/stringify.zig").stringify; -pub const stringifyMaxDepth = @import("json/stringify.zig").stringifyMaxDepth; -pub const stringifyArbitraryDepth = @import("json/stringify.zig").stringifyArbitraryDepth; -pub const stringifyAlloc = @import("json/stringify.zig").stringifyAlloc; -pub const writeStream = @import("json/stringify.zig").writeStream; -pub const writeStreamMaxDepth = @import("json/stringify.zig").writeStreamMaxDepth; -pub const writeStreamArbitraryDepth = @import("json/stringify.zig").writeStreamArbitraryDepth; -pub const WriteStream = @import("json/stringify.zig").WriteStream; -pub const encodeJsonString = @import("json/stringify.zig").encodeJsonString; -pub const encodeJsonStringChars = @import("json/stringify.zig").encodeJsonStringChars; +pub const Stringify = @import("json/Stringify.zig"); -pub const Formatter = @import("json/fmt.zig").Formatter; -pub const fmt = @import("json/fmt.zig").fmt; +/// Returns a formatter that formats the given value using stringify. +pub fn fmt(value: anytype, options: Stringify.Options) Formatter(@TypeOf(value)) { + return Formatter(@TypeOf(value)){ .value = value, .options = options }; +} + +test fmt { + const expectFmt = std.testing.expectFmt; + try expectFmt("123", "{f}", .{fmt(@as(u32, 123), .{})}); + try expectFmt( + \\{"num":927,"msg":"hello","sub":{"mybool":true}} + , "{f}", .{fmt(struct { + num: u32, + msg: []const u8, + sub: struct { + mybool: bool, + }, + }{ + .num = 927, + .msg = "hello", + .sub = .{ .mybool = true }, + }, .{})}); +} + +/// Formats the given value using stringify. +pub fn Formatter(comptime T: type) type { + return struct { + value: T, + options: Stringify.Options, + + pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void { + try Stringify.value(self.value, self.options, writer); + } + }; +} test { _ = @import("json/test.zig"); - _ = @import("json/scanner.zig"); + _ = Scanner; _ = @import("json/dynamic.zig"); _ = @import("json/hashmap.zig"); _ = @import("json/static.zig"); - _ = @import("json/stringify.zig"); + _ = Stringify; _ = @import("json/JSONTestSuite_test.zig"); } diff --git a/lib/std/json/Scanner.zig b/lib/std/json/Scanner.zig new file mode 100644 index 0000000000..b9c3c506a5 --- /dev/null +++ b/lib/std/json/Scanner.zig @@ -0,0 +1,1767 @@ +//! The lowest level parsing API in this package; +//! supports streaming input with a low memory footprint. +//! The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input. +//! Specifically `d/8` bytes are required for this purpose, +//! with some extra buffer according to the implementation of `std.ArrayList`. +//! +//! This scanner can emit partial tokens; see `std.json.Token`. +//! The input to this class is a sequence of input buffers that you must supply one at a time. +//! Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned. +//! Then call `feedInput()` again and so forth. +//! Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`, +//! or when `error.BufferUnderrun` requests more data and there is no more. +//! Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned. +//! +//! Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259 +//! * RFC 8259 requires JSON documents be valid UTF-8, +//! but makes an allowance for systems that are "part of a closed ecosystem". +//! I have no idea what that's supposed to mean in the context of a standard specification. +//! This implementation requires inputs to be valid UTF-8. +//! * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits, +//! but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed. +//! (RFC 5234 defines HEXDIG to only allow uppercase.) +//! * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value". +//! See http://www.unicode.org/glossary/#unicode_scalar_value . +//! * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences, +//! but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?), +//! which would mean that unpaired surrogate halves are forbidden. +//! By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to) +//! explicitly allows unpaired surrogate halves. +//! This implementation forbids unpaired surrogate halves in \u sequences. +//! If a high surrogate half appears in a \u sequence, +//! then a low surrogate half must immediately follow in \u notation. +//! * RFC 8259 allows implementations to "accept non-JSON forms or extensions". +//! This implementation does not accept any of that. +//! * RFC 8259 allows implementations to put limits on "the size of texts", +//! "the maximum depth of nesting", "the range and precision of numbers", +//! and "the length and character contents of strings". +//! This low-level implementation does not limit these, +//! except where noted above, and except that nesting depth requires memory allocation. +//! Note that this low-level API does not interpret numbers numerically, +//! but simply emits their source form for some higher level code to make sense of. +//! * This low-level implementation allows duplicate object keys, +//! and key/value pairs are emitted in the order they appear in the input. + +const Scanner = @This(); +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const assert = std.debug.assert; +const BitStack = std.BitStack; + +state: State = .value, +string_is_object_key: bool = false, +stack: BitStack, +value_start: usize = undefined, +utf16_code_units: [2]u16 = undefined, + +input: []const u8 = "", +cursor: usize = 0, +is_end_of_input: bool = false, +diagnostics: ?*Diagnostics = null, + +/// The allocator is only used to track `[]` and `{}` nesting levels. +pub fn initStreaming(allocator: Allocator) @This() { + return .{ + .stack = BitStack.init(allocator), + }; +} +/// Use this if your input is a single slice. +/// This is effectively equivalent to: +/// ``` +/// initStreaming(allocator); +/// feedInput(complete_input); +/// endInput(); +/// ``` +pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() { + return .{ + .stack = BitStack.init(allocator), + .input = complete_input, + .is_end_of_input = true, + }; +} +pub fn deinit(self: *@This()) void { + self.stack.deinit(); + self.* = undefined; +} + +pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { + diagnostics.cursor_pointer = &self.cursor; + self.diagnostics = diagnostics; +} + +/// Call this whenever you get `error.BufferUnderrun` from `next()`. +/// When there is no more input to provide, call `endInput()`. +pub fn feedInput(self: *@This(), input: []const u8) void { + assert(self.cursor == self.input.len); // Not done with the last input slice. + if (self.diagnostics) |diag| { + diag.total_bytes_before_current_input += self.input.len; + // This usually goes "negative" to measure how far before the beginning + // of the new buffer the current line started. + diag.line_start_cursor -%= self.cursor; + } + self.input = input; + self.cursor = 0; + self.value_start = 0; +} +/// Call this when you will no longer call `feedInput()` anymore. +/// This can be called either immediately after the last `feedInput()`, +/// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`. +/// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`. +pub fn endInput(self: *@This()) void { + self.is_end_of_input = true; +} + +pub const NextError = Error || Allocator.Error || error{BufferUnderrun}; +pub const AllocError = Error || Allocator.Error || error{ValueTooLong}; +pub const PeekError = Error || error{BufferUnderrun}; +pub const SkipError = Error || Allocator.Error; +pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun}; + +/// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` +/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. +/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. +pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { + return self.nextAllocMax(allocator, when, default_max_value_len); +} + +/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. +/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. +pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { + assert(self.is_end_of_input); // This function is not available in streaming mode. + const token_type = self.peekNextTokenType() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + switch (token_type) { + .number, .string => { + var value_list = ArrayList(u8).init(allocator); + errdefer { + value_list.deinit(); + } + if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) |slice| { + return if (token_type == .number) + Token{ .number = slice } + else + Token{ .string = slice }; + } else { + return if (token_type == .number) + Token{ .allocated_number = try value_list.toOwnedSlice() } + else + Token{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + + // Simple tokens never alloc. + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => return self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }, + } +} + +/// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` +pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 { + return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); +} +/// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`. +/// When allocation is not necessary with `.alloc_if_needed`, +/// this method returns the content slice from the input buffer, and `value_list` is not touched. +/// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`, +/// and returns `null` once the final `.number` or `.string` token has been written into it. +/// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list. +/// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation +/// can be resumed by passing the same array list in again. +/// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type; +/// the caller of this method is expected to know which type of token is being processed. +pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 { + while (true) { + const token = try self.next(); + switch (token) { + // Accumulate partial values. + .partial_number, .partial_string => |slice| { + try appendSlice(value_list, slice, max_value_len); + }, + .partial_string_escaped_1 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_2 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_3 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_4 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + + // Return complete values. + .number => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary. + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + .string => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary. + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this. + + .allocated_number, .allocated_string => unreachable, + } + } +} + +/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. +/// If the next token type is `.object_begin` or `.array_begin`, +/// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found. +/// If the next token type is `.number` or `.string`, +/// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found. +/// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once. +/// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`; +/// see `peekNextTokenType()`. +pub fn skipValue(self: *@This()) SkipError!void { + assert(self.is_end_of_input); // This function is not available in streaming mode. + switch (self.peekNextTokenType() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) { + .object_begin, .array_begin => { + self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + }, + .number, .string => { + while (true) { + switch (self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) { + .partial_number, + .partial_string, + .partial_string_escaped_1, + .partial_string_escaped_2, + .partial_string_escaped_3, + .partial_string_escaped_4, + => continue, + + .number, .string => break, + + else => unreachable, + } + } + }, + .true, .false, .null => { + _ = self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + }, + + .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. + } +} + +/// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height. +/// Unlike `skipValue()`, this function is available in streaming mode. +pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void { + while (true) { + switch (try self.next()) { + .object_end, .array_end => { + if (self.stackHeight() == terminal_stack_height) break; + }, + .end_of_document => unreachable, + else => continue, + } + } +} + +/// The depth of `{}` or `[]` nesting levels at the current position. +pub fn stackHeight(self: *const @This()) usize { + return self.stack.bit_len; +} + +/// Pre allocate memory to hold the given number of nesting levels. +/// `stackHeight()` up to the given number will not cause allocations. +pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { + try self.stack.ensureTotalCapacity(height); +} + +/// See `std.json.Token` for documentation of this function. +pub fn next(self: *@This()) NextError!Token { + state_loop: while (true) { + switch (self.state) { + .value => { + switch (try self.skipWhitespaceExpectByte()) { + // Object, Array + '{' => { + try self.stack.push(OBJECT_MODE); + self.cursor += 1; + self.state = .object_start; + return .object_begin; + }, + '[' => { + try self.stack.push(ARRAY_MODE); + self.cursor += 1; + self.state = .array_start; + return .array_begin; + }, + + // String + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + continue :state_loop; + }, + + // Number + '1'...'9' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_int; + continue :state_loop; + }, + '0' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_leading_zero; + continue :state_loop; + }, + '-' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_minus; + continue :state_loop; + }, + + // literal values + 't' => { + self.cursor += 1; + self.state = .literal_t; + continue :state_loop; + }, + 'f' => { + self.cursor += 1; + self.state = .literal_f; + continue :state_loop; + }, + 'n' => { + self.cursor += 1; + self.state = .literal_n; + continue :state_loop; + }, + + else => return error.SyntaxError, + } + }, + + .post_value => { + if (try self.skipWhitespaceCheckEnd()) return .end_of_document; + + const c = self.input[self.cursor]; + if (self.string_is_object_key) { + self.string_is_object_key = false; + switch (c) { + ':' => { + self.cursor += 1; + self.state = .value; + continue :state_loop; + }, + else => return error.SyntaxError, + } + } + + switch (c) { + '}' => { + if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError; + self.cursor += 1; + // stay in .post_value state. + return .object_end; + }, + ']' => { + if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError; + self.cursor += 1; + // stay in .post_value state. + return .array_end; + }, + ',' => { + switch (self.stack.peek()) { + OBJECT_MODE => { + self.state = .object_post_comma; + }, + ARRAY_MODE => { + self.state = .value; + }, + } + self.cursor += 1; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .object_start => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + self.string_is_object_key = true; + continue :state_loop; + }, + '}' => { + self.cursor += 1; + _ = self.stack.pop(); + self.state = .post_value; + return .object_end; + }, + else => return error.SyntaxError, + } + }, + .object_post_comma => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + self.string_is_object_key = true; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .array_start => { + switch (try self.skipWhitespaceExpectByte()) { + ']' => { + self.cursor += 1; + _ = self.stack.pop(); + self.state = .post_value; + return .array_end; + }, + else => { + self.state = .value; + continue :state_loop; + }, + } + }, + + .number_minus => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0' => { + self.cursor += 1; + self.state = .number_leading_zero; + continue :state_loop; + }, + '1'...'9' => { + self.cursor += 1; + self.state = .number_int; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_leading_zero => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true); + switch (self.input[self.cursor]) { + '.' => { + self.cursor += 1; + self.state = .number_post_dot; + continue :state_loop; + }, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + }, + .number_int => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + '.' => { + self.cursor += 1; + self.state = .number_post_dot; + continue :state_loop; + }, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + .number_post_dot => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_frac; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_frac => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + .number_post_e => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_exp; + continue :state_loop; + }, + '+', '-' => { + self.cursor += 1; + self.state = .number_post_e_sign; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_post_e_sign => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_exp; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_exp => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + + .string => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string. + + // ASCII plain text. + 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue, + + // Special characters. + '"' => { + const result = Token{ .string = self.takeValueSlice() }; + self.cursor += 1; + self.state = .post_value; + return result; + }, + '\\' => { + const slice = self.takeValueSlice(); + self.cursor += 1; + self.state = .string_backslash; + if (slice.len > 0) return Token{ .partial_string = slice }; + continue :state_loop; + }, + + // UTF-8 validation. + // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + 0xC2...0xDF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + 0xE0 => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte_guard_against_overlong; + continue :state_loop; + }, + 0xE1...0xEC, 0xEE...0xEF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + 0xED => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half; + continue :state_loop; + }, + 0xF0 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte_guard_against_overlong; + continue :state_loop; + }, + 0xF1...0xF3 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte; + continue :state_loop; + }, + 0xF4 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte_guard_against_too_large; + continue :state_loop; + }, + 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8. + } + } + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + const slice = self.takeValueSlice(); + if (slice.len > 0) return Token{ .partial_string = slice }; + return error.BufferUnderrun; + }, + .string_backslash => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + '"', '\\', '/' => { + // Since these characters now represent themselves literally, + // we can simply begin the next plaintext slice here. + self.value_start = self.cursor; + self.cursor += 1; + self.state = .string; + continue :state_loop; + }, + 'b' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{0x08} }; + }, + 'f' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{0x0c} }; + }, + 'n' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\n'} }; + }, + 'r' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\r'} }; + }, + 't' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\t'} }; + }, + 'u' => { + self.cursor += 1; + self.state = .string_backslash_u; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .string_backslash_u => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] = @as(u16, c - '0') << 12; + }, + 'A'...'F' => { + self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12; + }, + 'a'...'f' => { + self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_1; + continue :state_loop; + }, + .string_backslash_u_1 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] |= @as(u16, c - '0') << 8; + }, + 'A'...'F' => { + self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8; + }, + 'a'...'f' => { + self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_2; + continue :state_loop; + }, + .string_backslash_u_2 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] |= @as(u16, c - '0') << 4; + }, + 'A'...'F' => { + self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4; + }, + 'a'...'f' => { + self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_3; + continue :state_loop; + }, + .string_backslash_u_3 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] |= c - '0'; + }, + 'A'...'F' => { + self.utf16_code_units[0] |= c - 'A' + 10; + }, + 'a'...'f' => { + self.utf16_code_units[0] |= c - 'a' + 10; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) { + self.state = .string_surrogate_half; + continue :state_loop; + } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) { + return error.SyntaxError; // Unexpected low surrogate half. + } else { + self.value_start = self.cursor; + self.state = .string; + return partialStringCodepoint(self.utf16_code_units[0]); + } + }, + .string_surrogate_half => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + '\\' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 'u' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash_u; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 'D', 'd' => { + self.cursor += 1; + self.utf16_code_units[1] = 0xD << 12; + self.state = .string_surrogate_half_backslash_u_1; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u_1 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + 'C'...'F' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8; + self.state = .string_surrogate_half_backslash_u_2; + continue :state_loop; + }, + 'c'...'f' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8; + self.state = .string_surrogate_half_backslash_u_2; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u_2 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - '0') << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + 'A'...'F' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + 'a'...'f' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .string_surrogate_half_backslash_u_3 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[1] |= c - '0'; + }, + 'A'...'F' => { + self.utf16_code_units[1] |= c - 'A' + 10; + }, + 'a'...'f' => { + self.utf16_code_units[1] |= c - 'a' + 10; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable; + return partialStringCodepoint(code_point); + }, + + .string_utf8_last_byte => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte_guard_against_overlong => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0xA0...0xBF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte_guard_against_surrogate_half => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0x9F => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte_guard_against_overlong => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x90...0xBF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte_guard_against_too_large => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0x8F => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + + .literal_t => { + switch (try self.expectByte()) { + 'r' => { + self.cursor += 1; + self.state = .literal_tr; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_tr => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .literal_tru; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_tru => { + switch (try self.expectByte()) { + 'e' => { + self.cursor += 1; + self.state = .post_value; + return .true; + }, + else => return error.SyntaxError, + } + }, + .literal_f => { + switch (try self.expectByte()) { + 'a' => { + self.cursor += 1; + self.state = .literal_fa; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fa => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .literal_fal; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fal => { + switch (try self.expectByte()) { + 's' => { + self.cursor += 1; + self.state = .literal_fals; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fals => { + switch (try self.expectByte()) { + 'e' => { + self.cursor += 1; + self.state = .post_value; + return .false; + }, + else => return error.SyntaxError, + } + }, + .literal_n => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .literal_nu; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_nu => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .literal_nul; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_nul => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .post_value; + return .null; + }, + else => return error.SyntaxError, + } + }, + } + unreachable; + } +} + +/// Seeks ahead in the input until the first byte of the next token (or the end of the input) +/// determines which type of token will be returned from the next `next*()` call. +/// This function is idempotent, only advancing past commas, colons, and inter-token whitespace. +pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { + state_loop: while (true) { + switch (self.state) { + .value => { + switch (try self.skipWhitespaceExpectByte()) { + '{' => return .object_begin, + '[' => return .array_begin, + '"' => return .string, + '-', '0'...'9' => return .number, + 't' => return .true, + 'f' => return .false, + 'n' => return .null, + else => return error.SyntaxError, + } + }, + + .post_value => { + if (try self.skipWhitespaceCheckEnd()) return .end_of_document; + + const c = self.input[self.cursor]; + if (self.string_is_object_key) { + self.string_is_object_key = false; + switch (c) { + ':' => { + self.cursor += 1; + self.state = .value; + continue :state_loop; + }, + else => return error.SyntaxError, + } + } + + switch (c) { + '}' => return .object_end, + ']' => return .array_end, + ',' => { + switch (self.stack.peek()) { + OBJECT_MODE => { + self.state = .object_post_comma; + }, + ARRAY_MODE => { + self.state = .value; + }, + } + self.cursor += 1; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .object_start => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => return .string, + '}' => return .object_end, + else => return error.SyntaxError, + } + }, + .object_post_comma => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => return .string, + else => return error.SyntaxError, + } + }, + + .array_start => { + switch (try self.skipWhitespaceExpectByte()) { + ']' => return .array_end, + else => { + self.state = .value; + continue :state_loop; + }, + } + }, + + .number_minus, + .number_leading_zero, + .number_int, + .number_post_dot, + .number_frac, + .number_post_e, + .number_post_e_sign, + .number_exp, + => return .number, + + .string, + .string_backslash, + .string_backslash_u, + .string_backslash_u_1, + .string_backslash_u_2, + .string_backslash_u_3, + .string_surrogate_half, + .string_surrogate_half_backslash, + .string_surrogate_half_backslash_u, + .string_surrogate_half_backslash_u_1, + .string_surrogate_half_backslash_u_2, + .string_surrogate_half_backslash_u_3, + => return .string, + + .string_utf8_last_byte, + .string_utf8_second_to_last_byte, + .string_utf8_second_to_last_byte_guard_against_overlong, + .string_utf8_second_to_last_byte_guard_against_surrogate_half, + .string_utf8_third_to_last_byte, + .string_utf8_third_to_last_byte_guard_against_overlong, + .string_utf8_third_to_last_byte_guard_against_too_large, + => return .string, + + .literal_t, + .literal_tr, + .literal_tru, + => return .true, + .literal_f, + .literal_fa, + .literal_fal, + .literal_fals, + => return .false, + .literal_n, + .literal_nu, + .literal_nul, + => return .null, + } + unreachable; + } +} + +const State = enum { + value, + post_value, + + object_start, + object_post_comma, + + array_start, + + number_minus, + number_leading_zero, + number_int, + number_post_dot, + number_frac, + number_post_e, + number_post_e_sign, + number_exp, + + string, + string_backslash, + string_backslash_u, + string_backslash_u_1, + string_backslash_u_2, + string_backslash_u_3, + string_surrogate_half, + string_surrogate_half_backslash, + string_surrogate_half_backslash_u, + string_surrogate_half_backslash_u_1, + string_surrogate_half_backslash_u_2, + string_surrogate_half_backslash_u_3, + + // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + string_utf8_last_byte, // State A + string_utf8_second_to_last_byte, // State B + string_utf8_second_to_last_byte_guard_against_overlong, // State C + string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D + string_utf8_third_to_last_byte, // State E + string_utf8_third_to_last_byte_guard_against_overlong, // State F + string_utf8_third_to_last_byte_guard_against_too_large, // State G + + literal_t, + literal_tr, + literal_tru, + literal_f, + literal_fa, + literal_fal, + literal_fals, + literal_n, + literal_nu, + literal_nul, +}; + +fn expectByte(self: *const @This()) !u8 { + if (self.cursor < self.input.len) { + return self.input[self.cursor]; + } + // No byte. + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + return error.BufferUnderrun; +} + +fn skipWhitespace(self: *@This()) void { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + // Whitespace + ' ', '\t', '\r' => continue, + '\n' => { + if (self.diagnostics) |diag| { + diag.line_number += 1; + // This will count the newline itself, + // which means a straight-forward subtraction will give a 1-based column number. + diag.line_start_cursor = self.cursor; + } + continue; + }, + else => return, + } + } +} + +fn skipWhitespaceExpectByte(self: *@This()) !u8 { + self.skipWhitespace(); + return self.expectByte(); +} + +fn skipWhitespaceCheckEnd(self: *@This()) !bool { + self.skipWhitespace(); + if (self.cursor >= self.input.len) { + // End of buffer. + if (self.is_end_of_input) { + // End of everything. + if (self.stackHeight() == 0) { + // We did it! + return true; + } + return error.UnexpectedEndOfInput; + } + return error.BufferUnderrun; + } + if (self.stackHeight() == 0) return error.SyntaxError; + return false; +} + +fn takeValueSlice(self: *@This()) []const u8 { + const slice = self.input[self.value_start..self.cursor]; + self.value_start = self.cursor; + return slice; +} +fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 { + // Check if the escape sequence started before the current input buffer. + // (The algebra here is awkward to avoid unsigned underflow, + // but it's just making sure the slice on the next line isn't UB.) + if (self.cursor <= self.value_start + trailing_negative_offset) return ""; + const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset]; + // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter, + // because we always set it again while emitting the .partial_string_escaped_*. + self.value_start = self.cursor; + return slice; +} + +fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token { + const slice = self.takeValueSlice(); + if (self.is_end_of_input) { + if (!allow_end) return error.UnexpectedEndOfInput; + self.state = .post_value; + return Token{ .number = slice }; + } + if (slice.len == 0) return error.BufferUnderrun; + return Token{ .partial_number = slice }; +} + +fn endOfBufferInString(self: *@This()) !Token { + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) { + // Don't include the escape sequence in the partial string. + .string_backslash => 1, + .string_backslash_u => 2, + .string_backslash_u_1 => 3, + .string_backslash_u_2 => 4, + .string_backslash_u_3 => 5, + .string_surrogate_half => 6, + .string_surrogate_half_backslash => 7, + .string_surrogate_half_backslash_u => 8, + .string_surrogate_half_backslash_u_1 => 9, + .string_surrogate_half_backslash_u_2 => 10, + .string_surrogate_half_backslash_u_3 => 11, + + // Include everything up to the cursor otherwise. + .string, + .string_utf8_last_byte, + .string_utf8_second_to_last_byte, + .string_utf8_second_to_last_byte_guard_against_overlong, + .string_utf8_second_to_last_byte_guard_against_surrogate_half, + .string_utf8_third_to_last_byte, + .string_utf8_third_to_last_byte_guard_against_overlong, + .string_utf8_third_to_last_byte_guard_against_too_large, + => 0, + + else => unreachable, + }); + if (slice.len == 0) return error.BufferUnderrun; + return Token{ .partial_string = slice }; +} + +fn partialStringCodepoint(code_point: u21) Token { + var buf: [4]u8 = undefined; + switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) { + 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* }, + 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* }, + 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* }, + 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* }, + else => unreachable, + } +} + +/// Scan the input and check for malformed JSON. +/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`. +/// Returns any errors from the allocator as-is, which is unlikely, +/// but can be caused by extreme nesting depth in the input. +pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool { + var scanner = Scanner.initCompleteInput(allocator, s); + defer scanner.deinit(); + + while (true) { + const token = scanner.next() catch |err| switch (err) { + error.SyntaxError, error.UnexpectedEndOfInput => return false, + error.OutOfMemory => return error.OutOfMemory, + error.BufferUnderrun => unreachable, + }; + if (token == .end_of_document) break; + } + + return true; +} + +/// The parsing errors are divided into two categories: +/// * `SyntaxError` is for clearly malformed JSON documents, +/// such as giving an input document that isn't JSON at all. +/// * `UnexpectedEndOfInput` is for signaling that everything's been +/// valid so far, but the input appears to be truncated for some reason. +/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`. +pub const Error = error{ SyntaxError, UnexpectedEndOfInput }; + +/// Used by `json.reader`. +pub const default_buffer_size = 0x1000; + +/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar: +/// ``` +/// = .end_of_document +/// = +/// | +/// | +/// | +/// | +/// | .true +/// | .false +/// | .null +/// = .object_begin ( )* .object_end +/// = .array_begin ( )* .array_end +/// = +/// = +/// ``` +/// +/// What you get for `` and `` values depends on which `next*()` method you call: +/// +/// ``` +/// next(): +/// = ( .partial_number )* .number +/// = ( )* .string +/// = +/// | .partial_string +/// | .partial_string_escaped_1 +/// | .partial_string_escaped_2 +/// | .partial_string_escaped_3 +/// | .partial_string_escaped_4 +/// +/// nextAlloc*(..., .alloc_always): +/// = .allocated_number +/// = .allocated_string +/// +/// nextAlloc*(..., .alloc_if_needed): +/// = +/// | .number +/// | .allocated_number +/// = +/// | .string +/// | .allocated_string +/// ``` +/// +/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value. +/// For number values, this is the representation of the number exactly as it appears in the input. +/// For strings, this is the content of the string after resolving escape sequences. +/// +/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator. +/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations. +/// +/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences. +/// To get a complete value in memory, you need to concatenate the values yourself. +/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result. +/// +/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer. +/// The memory may become undefined during the next call to `json.Scanner.feedInput()` +/// or any `json.Reader` method whose return error set includes `json.Error`. +/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`, +/// which makes a copy for you. +/// +/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that +/// the previously partial value is completed with no additional bytes. +/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.) +/// `.partial_*` tokens never have `0` length. +/// +/// The recommended strategy for using the different `next*()` methods is something like this: +/// +/// When you're expecting an object key, use `.alloc_if_needed`. +/// You often don't need a copy of the key string to persist; you might just check which field it is. +/// In the case that the key happens to require an allocation, free it immediately after checking it. +/// +/// When you're expecting a meaningful string value (such as on the right of a `:`), +/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document. +/// +/// When you're expecting a number value, use `.alloc_if_needed`. +/// You're probably going to be parsing the string representation of the number into a numeric representation, +/// so you need the complete string representation only temporarily. +/// +/// When you're skipping an unrecognized value, use `skipValue()`. +pub const Token = union(enum) { + object_begin, + object_end, + array_begin, + array_end, + + true, + false, + null, + + number: []const u8, + partial_number: []const u8, + allocated_number: []u8, + + string: []const u8, + partial_string: []const u8, + partial_string_escaped_1: [1]u8, + partial_string_escaped_2: [2]u8, + partial_string_escaped_3: [3]u8, + partial_string_escaped_4: [4]u8, + allocated_string: []u8, + + end_of_document, +}; + +/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call. +pub const TokenType = enum { + object_begin, + object_end, + array_begin, + array_end, + true, + false, + null, + number, + string, + end_of_document, +}; + +/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);` +/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized. +/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()` +/// to get meaningful information from this. +pub const Diagnostics = struct { + line_number: u64 = 1, + line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1. + total_bytes_before_current_input: u64 = 0, + cursor_pointer: *const usize = undefined, + + /// Starts at 1. + pub fn getLine(self: *const @This()) u64 { + return self.line_number; + } + /// Starts at 1. + pub fn getColumn(self: *const @This()) u64 { + return self.cursor_pointer.* -% self.line_start_cursor; + } + /// Starts at 0. Measures the byte offset since the start of the input. + pub fn getByteOffset(self: *const @This()) u64 { + return self.total_bytes_before_current_input + self.cursor_pointer.*; + } +}; + +/// See the documentation for `std.json.Token`. +pub const AllocWhen = enum { alloc_if_needed, alloc_always }; + +/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default. +/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`. +pub const default_max_value_len = 4 * 1024 * 1024; + +/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader. +pub const Reader = struct { + scanner: Scanner, + reader: *std.Io.Reader, + + /// The allocator is only used to track `[]` and `{}` nesting levels. + pub fn init(allocator: Allocator, io_reader: *std.Io.Reader) @This() { + return .{ + .scanner = Scanner.initStreaming(allocator), + .reader = io_reader, + }; + } + pub fn deinit(self: *@This()) void { + self.scanner.deinit(); + self.* = undefined; + } + + /// Calls `std.json.Scanner.enableDiagnostics`. + pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { + self.scanner.enableDiagnostics(diagnostics); + } + + pub const NextError = std.Io.Reader.Error || Error || Allocator.Error; + pub const SkipError = Reader.NextError; + pub const AllocError = Reader.NextError || error{ValueTooLong}; + pub const PeekError = std.Io.Reader.Error || Error; + + /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) Reader.AllocError!Token { + return self.nextAllocMax(allocator, when, default_max_value_len); + } + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) Reader.AllocError!Token { + const token_type = try self.peekNextTokenType(); + switch (token_type) { + .number, .string => { + var value_list = ArrayList(u8).init(allocator); + errdefer { + value_list.deinit(); + } + if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| { + return if (token_type == .number) + Token{ .number = slice } + else + Token{ .string = slice }; + } else { + return if (token_type == .number) + Token{ .allocated_number = try value_list.toOwnedSlice() } + else + Token{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + + // Simple tokens never alloc. + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => return try self.next(), + } + } + + /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` + pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) Reader.AllocError!?[]const u8 { + return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); + } + /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`. + pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) Reader.AllocError!?[]const u8 { + while (true) { + return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`. + pub fn skipValue(self: *@This()) Reader.SkipError!void { + switch (try self.peekNextTokenType()) { + .object_begin, .array_begin => { + try self.skipUntilStackHeight(self.stackHeight()); + }, + .number, .string => { + while (true) { + switch (try self.next()) { + .partial_number, + .partial_string, + .partial_string_escaped_1, + .partial_string_escaped_2, + .partial_string_escaped_3, + .partial_string_escaped_4, + => continue, + + .number, .string => break, + + else => unreachable, + } + } + }, + .true, .false, .null => { + _ = try self.next(); + }, + + .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. + } + } + /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`. + pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) Reader.NextError!void { + while (true) { + return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// Calls `std.json.Scanner.stackHeight`. + pub fn stackHeight(self: *const @This()) usize { + return self.scanner.stackHeight(); + } + /// Calls `std.json.Scanner.ensureTotalStackCapacity`. + pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { + try self.scanner.ensureTotalStackCapacity(height); + } + + /// See `std.json.Token` for documentation of this function. + pub fn next(self: *@This()) Reader.NextError!Token { + while (true) { + return self.scanner.next() catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// See `std.json.Scanner.peekNextTokenType()`. + pub fn peekNextTokenType(self: *@This()) Reader.PeekError!TokenType { + while (true) { + return self.scanner.peekNextTokenType() catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + fn refillBuffer(self: *@This()) std.Io.Reader.Error!void { + const input = self.reader.peekGreedy(1) catch |err| switch (err) { + error.ReadFailed => return error.ReadFailed, + error.EndOfStream => return self.scanner.endInput(), + }; + self.reader.toss(input.len); + self.scanner.feedInput(input); + } +}; + +const OBJECT_MODE = 0; +const ARRAY_MODE = 1; + +fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void { + const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong; + if (new_len > max_value_len) return error.ValueTooLong; + try list.appendSlice(buf); +} + +/// For the slice you get from a `Token.number` or `Token.allocated_number`, +/// this function returns true if the number doesn't contain any fraction or exponent components, and is not `-0`. +/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`. +/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value. +/// This function will not give meaningful results on non-numeric input. +pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool { + if (std.mem.eql(u8, value, "-0")) return false; + return std.mem.indexOfAny(u8, value, ".eE") == null; +} + +test { + _ = @import("./scanner_test.zig"); +} diff --git a/lib/std/json/Stringify.zig b/lib/std/json/Stringify.zig new file mode 100644 index 0000000000..4d79217a87 --- /dev/null +++ b/lib/std/json/Stringify.zig @@ -0,0 +1,999 @@ +//! Writes JSON ([RFC8259](https://tools.ietf.org/html/rfc8259)) formatted data +//! to a stream. +//! +//! The sequence of method calls to write JSON content must follow this grammar: +//! ``` +//! = +//! = +//! | +//! | +//! | write +//! | print +//! | +//! = beginObject ( )* endObject +//! = objectField | objectFieldRaw | +//! = beginArray ( )* endArray +//! = beginWriteRaw ( stream.writeAll )* endWriteRaw +//! = beginObjectFieldRaw ( stream.writeAll )* endObjectFieldRaw +//! ``` + +const std = @import("../std.zig"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const BitStack = std.BitStack; +const Stringify = @This(); +const Writer = std.io.Writer; + +const IndentationMode = enum(u1) { + object = 0, + array = 1, +}; + +writer: *Writer, +options: Options = .{}, +indent_level: usize = 0, +next_punctuation: enum { + the_beginning, + none, + comma, + colon, +} = .the_beginning, + +nesting_stack: switch (safety_checks) { + .checked_to_fixed_depth => |fixed_buffer_size| [(fixed_buffer_size + 7) >> 3]u8, + .assumed_correct => void, +} = switch (safety_checks) { + .checked_to_fixed_depth => @splat(0), + .assumed_correct => {}, +}, + +raw_streaming_mode: if (build_mode_has_safety) + enum { none, value, objectField } +else + void = if (build_mode_has_safety) .none else {}, + +const build_mode_has_safety = switch (@import("builtin").mode) { + .Debug, .ReleaseSafe => true, + .ReleaseFast, .ReleaseSmall => false, +}; + +/// The `safety_checks_hint` parameter determines how much memory is used to enable assertions that the above grammar is being followed, +/// e.g. tripping an assertion rather than allowing `endObject` to emit the final `}` in `[[[]]}`. +/// "Depth" in this context means the depth of nested `[]` or `{}` expressions +/// (or equivalently the amount of recursion on the `` grammar expression above). +/// For example, emitting the JSON `[[[]]]` requires a depth of 3. +/// If `.checked_to_fixed_depth` is used, there is additionally an assertion that the nesting depth never exceeds the given limit. +/// `.checked_to_fixed_depth` embeds the storage required in the `Stringify` struct. +/// `.assumed_correct` requires no space and performs none of these assertions. +/// In `ReleaseFast` and `ReleaseSmall` mode, the given `safety_checks_hint` is ignored and is always treated as `.assumed_correct`. +const safety_checks_hint: union(enum) { + /// Rounded up to the nearest multiple of 8. + checked_to_fixed_depth: usize, + assumed_correct, +} = .{ .checked_to_fixed_depth = 256 }; + +const safety_checks: @TypeOf(safety_checks_hint) = if (build_mode_has_safety) + safety_checks_hint +else + .assumed_correct; + +pub const Error = Writer.Error; + +pub fn beginArray(self: *Stringify) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + try self.valueStart(); + try self.writer.writeByte('['); + try self.pushIndentation(.array); + self.next_punctuation = .none; +} + +pub fn beginObject(self: *Stringify) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + try self.valueStart(); + try self.writer.writeByte('{'); + try self.pushIndentation(.object); + self.next_punctuation = .none; +} + +pub fn endArray(self: *Stringify) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + self.popIndentation(.array); + switch (self.next_punctuation) { + .none => {}, + .comma => { + try self.indent(); + }, + .the_beginning, .colon => unreachable, + } + try self.writer.writeByte(']'); + self.valueDone(); +} + +pub fn endObject(self: *Stringify) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + self.popIndentation(.object); + switch (self.next_punctuation) { + .none => {}, + .comma => { + try self.indent(); + }, + .the_beginning, .colon => unreachable, + } + try self.writer.writeByte('}'); + self.valueDone(); +} + +fn pushIndentation(self: *Stringify, mode: IndentationMode) !void { + switch (safety_checks) { + .checked_to_fixed_depth => { + BitStack.pushWithStateAssumeCapacity(&self.nesting_stack, &self.indent_level, @intFromEnum(mode)); + }, + .assumed_correct => { + self.indent_level += 1; + }, + } +} +fn popIndentation(self: *Stringify, expected_mode: IndentationMode) void { + switch (safety_checks) { + .checked_to_fixed_depth => { + assert(BitStack.popWithState(&self.nesting_stack, &self.indent_level) == @intFromEnum(expected_mode)); + }, + .assumed_correct => { + self.indent_level -= 1; + }, + } +} + +fn indent(self: *Stringify) !void { + var char: u8 = ' '; + const n_chars = switch (self.options.whitespace) { + .minified => return, + .indent_1 => 1 * self.indent_level, + .indent_2 => 2 * self.indent_level, + .indent_3 => 3 * self.indent_level, + .indent_4 => 4 * self.indent_level, + .indent_8 => 8 * self.indent_level, + .indent_tab => blk: { + char = '\t'; + break :blk self.indent_level; + }, + }; + try self.writer.writeByte('\n'); + try self.writer.splatByteAll(char, n_chars); +} + +fn valueStart(self: *Stringify) !void { + if (self.isObjectKeyExpected()) |is_it| assert(!is_it); // Call objectField*(), not write(), for object keys. + return self.valueStartAssumeTypeOk(); +} +fn objectFieldStart(self: *Stringify) !void { + if (self.isObjectKeyExpected()) |is_it| assert(is_it); // Expected write(), not objectField*(). + return self.valueStartAssumeTypeOk(); +} +fn valueStartAssumeTypeOk(self: *Stringify) !void { + assert(!self.isComplete()); // JSON document already complete. + switch (self.next_punctuation) { + .the_beginning => { + // No indentation for the very beginning. + }, + .none => { + // First item in a container. + try self.indent(); + }, + .comma => { + // Subsequent item in a container. + try self.writer.writeByte(','); + try self.indent(); + }, + .colon => { + try self.writer.writeByte(':'); + if (self.options.whitespace != .minified) { + try self.writer.writeByte(' '); + } + }, + } +} +fn valueDone(self: *Stringify) void { + self.next_punctuation = .comma; +} + +// Only when safety is enabled: +fn isObjectKeyExpected(self: *const Stringify) ?bool { + switch (safety_checks) { + .checked_to_fixed_depth => return self.indent_level > 0 and + BitStack.peekWithState(&self.nesting_stack, self.indent_level) == @intFromEnum(IndentationMode.object) and + self.next_punctuation != .colon, + .assumed_correct => return null, + } +} +fn isComplete(self: *const Stringify) bool { + return self.indent_level == 0 and self.next_punctuation == .comma; +} + +/// An alternative to calling `write` that formats a value with `std.fmt`. +/// This function does the usual punctuation and indentation formatting +/// assuming the resulting formatted string represents a single complete value; +/// e.g. `"1"`, `"[]"`, `"[1,2]"`, not `"1,2"`. +/// This function may be useful for doing your own number formatting. +pub fn print(self: *Stringify, comptime fmt: []const u8, args: anytype) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + try self.valueStart(); + try self.writer.print(fmt, args); + self.valueDone(); +} + +test print { + var out_buf: [1024]u8 = undefined; + var out: Writer = .fixed(&out_buf); + + var w: Stringify = .{ .writer = &out, .options = .{ .whitespace = .indent_2 } }; + + try w.beginObject(); + try w.objectField("a"); + try w.print("[ ]", .{}); + try w.objectField("b"); + try w.beginArray(); + try w.print("[{s}] ", .{"[]"}); + try w.print(" {}", .{12345}); + try w.endArray(); + try w.endObject(); + + const expected = + \\{ + \\ "a": [ ], + \\ "b": [ + \\ [[]] , + \\ 12345 + \\ ] + \\} + ; + try std.testing.expectEqualStrings(expected, out.buffered()); +} + +/// An alternative to calling `write` that allows you to write directly to the `.writer` field, e.g. with `.writer.writeAll()`. +/// Call `beginWriteRaw()`, then write a complete value (including any quotes if necessary) directly to the `.writer` field, +/// then call `endWriteRaw()`. +/// This can be useful for streaming very long strings into the output without needing it all buffered in memory. +pub fn beginWriteRaw(self: *Stringify) !void { + if (build_mode_has_safety) { + assert(self.raw_streaming_mode == .none); + self.raw_streaming_mode = .value; + } + try self.valueStart(); +} + +/// See `beginWriteRaw`. +pub fn endWriteRaw(self: *Stringify) void { + if (build_mode_has_safety) { + assert(self.raw_streaming_mode == .value); + self.raw_streaming_mode = .none; + } + self.valueDone(); +} + +/// See `Stringify` for when to call this method. +/// `key` is the string content of the property name. +/// Surrounding quotes will be added and any special characters will be escaped. +/// See also `objectFieldRaw`. +pub fn objectField(self: *Stringify, key: []const u8) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + try self.objectFieldStart(); + try encodeJsonString(key, self.options, self.writer); + self.next_punctuation = .colon; +} +/// See `Stringify` for when to call this method. +/// `quoted_key` is the complete bytes of the key including quotes and any necessary escape sequences. +/// A few assertions are performed on the given value to ensure that the caller of this function understands the API contract. +/// See also `objectField`. +pub fn objectFieldRaw(self: *Stringify, quoted_key: []const u8) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + assert(quoted_key.len >= 2 and quoted_key[0] == '"' and quoted_key[quoted_key.len - 1] == '"'); // quoted_key should be "quoted". + try self.objectFieldStart(); + try self.writer.writeAll(quoted_key); + self.next_punctuation = .colon; +} + +/// In the rare case that you need to write very long object field names, +/// this is an alternative to `objectField` and `objectFieldRaw` that allows you to write directly to the `.writer` field +/// similar to `beginWriteRaw`. +/// Call `endObjectFieldRaw()` when you're done. +pub fn beginObjectFieldRaw(self: *Stringify) !void { + if (build_mode_has_safety) { + assert(self.raw_streaming_mode == .none); + self.raw_streaming_mode = .objectField; + } + try self.objectFieldStart(); +} + +/// See `beginObjectFieldRaw`. +pub fn endObjectFieldRaw(self: *Stringify) void { + if (build_mode_has_safety) { + assert(self.raw_streaming_mode == .objectField); + self.raw_streaming_mode = .none; + } + self.next_punctuation = .colon; +} + +/// Renders the given Zig value as JSON. +/// +/// Supported types: +/// * Zig `bool` -> JSON `true` or `false`. +/// * Zig `?T` -> `null` or the rendering of `T`. +/// * Zig `i32`, `u64`, etc. -> JSON number or string. +/// * When option `emit_nonportable_numbers_as_strings` is true, if the value is outside the range `+-1<<53` (the precise integer range of f64), it is rendered as a JSON string in base 10. Otherwise, it is rendered as JSON number. +/// * Zig floats -> JSON number or string. +/// * If the value cannot be precisely represented by an f64, it is rendered as a JSON string. Otherwise, it is rendered as JSON number. +/// * TODO: Float rendering will likely change in the future, e.g. to remove the unnecessary "e+00". +/// * Zig `[]const u8`, `[]u8`, `*[N]u8`, `@Vector(N, u8)`, and similar -> JSON string. +/// * See `Options.emit_strings_as_arrays`. +/// * If the content is not valid UTF-8, rendered as an array of numbers instead. +/// * Zig `[]T`, `[N]T`, `*[N]T`, `@Vector(N, T)`, and similar -> JSON array of the rendering of each item. +/// * Zig tuple -> JSON array of the rendering of each item. +/// * Zig `struct` -> JSON object with each field in declaration order. +/// * If the struct declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `Stringify`. See `std.json.Value` for an example. +/// * See `Options.emit_null_optional_fields`. +/// * Zig `union(enum)` -> JSON object with one field named for the active tag and a value representing the payload. +/// * If the payload is `void`, then the emitted value is `{}`. +/// * If the union declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `Stringify`. +/// * Zig `enum` -> JSON string naming the active tag. +/// * If the enum declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `Stringify`. +/// * If the enum is non-exhaustive, unnamed values are rendered as integers. +/// * Zig untyped enum literal -> JSON string naming the active tag. +/// * Zig error -> JSON string naming the error. +/// * Zig `*T` -> the rendering of `T`. Note there is no guard against circular-reference infinite recursion. +/// +/// See also alternative functions `print` and `beginWriteRaw`. +/// For writing object field names, use `objectField` instead. +pub fn write(self: *Stringify, v: anytype) Error!void { + if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); + const T = @TypeOf(v); + switch (@typeInfo(T)) { + .int => { + try self.valueStart(); + if (self.options.emit_nonportable_numbers_as_strings and + (v <= -(1 << 53) or v >= (1 << 53))) + { + try self.writer.print("\"{}\"", .{v}); + } else { + try self.writer.print("{}", .{v}); + } + self.valueDone(); + return; + }, + .comptime_int => { + return self.write(@as(std.math.IntFittingRange(v, v), v)); + }, + .float, .comptime_float => { + if (@as(f64, @floatCast(v)) == v) { + try self.valueStart(); + try self.writer.print("{}", .{@as(f64, @floatCast(v))}); + self.valueDone(); + return; + } + try self.valueStart(); + try self.writer.print("\"{}\"", .{v}); + self.valueDone(); + return; + }, + + .bool => { + try self.valueStart(); + try self.writer.writeAll(if (v) "true" else "false"); + self.valueDone(); + return; + }, + .null => { + try self.valueStart(); + try self.writer.writeAll("null"); + self.valueDone(); + return; + }, + .optional => { + if (v) |payload| { + return try self.write(payload); + } else { + return try self.write(null); + } + }, + .@"enum" => |enum_info| { + if (std.meta.hasFn(T, "jsonStringify")) { + return v.jsonStringify(self); + } + + if (!enum_info.is_exhaustive) { + inline for (enum_info.fields) |field| { + if (v == @field(T, field.name)) { + break; + } + } else { + return self.write(@intFromEnum(v)); + } + } + + return self.stringValue(@tagName(v)); + }, + .enum_literal => { + return self.stringValue(@tagName(v)); + }, + .@"union" => { + if (std.meta.hasFn(T, "jsonStringify")) { + return v.jsonStringify(self); + } + + const info = @typeInfo(T).@"union"; + if (info.tag_type) |UnionTagType| { + try self.beginObject(); + inline for (info.fields) |u_field| { + if (v == @field(UnionTagType, u_field.name)) { + try self.objectField(u_field.name); + if (u_field.type == void) { + // void v is {} + try self.beginObject(); + try self.endObject(); + } else { + try self.write(@field(v, u_field.name)); + } + break; + } + } else { + unreachable; // No active tag? + } + try self.endObject(); + return; + } else { + @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'"); + } + }, + .@"struct" => |S| { + if (std.meta.hasFn(T, "jsonStringify")) { + return v.jsonStringify(self); + } + + if (S.is_tuple) { + try self.beginArray(); + } else { + try self.beginObject(); + } + inline for (S.fields) |Field| { + // don't include void fields + if (Field.type == void) continue; + + var emit_field = true; + + // don't include optional fields that are null when emit_null_optional_fields is set to false + if (@typeInfo(Field.type) == .optional) { + if (self.options.emit_null_optional_fields == false) { + if (@field(v, Field.name) == null) { + emit_field = false; + } + } + } + + if (emit_field) { + if (!S.is_tuple) { + try self.objectField(Field.name); + } + try self.write(@field(v, Field.name)); + } + } + if (S.is_tuple) { + try self.endArray(); + } else { + try self.endObject(); + } + return; + }, + .error_set => return self.stringValue(@errorName(v)), + .pointer => |ptr_info| switch (ptr_info.size) { + .one => switch (@typeInfo(ptr_info.child)) { + .array => { + // Coerce `*[N]T` to `[]const T`. + const Slice = []const std.meta.Elem(ptr_info.child); + return self.write(@as(Slice, v)); + }, + else => { + return self.write(v.*); + }, + }, + .many, .slice => { + if (ptr_info.size == .many and ptr_info.sentinel() == null) + @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel"); + const slice = if (ptr_info.size == .many) std.mem.span(v) else v; + + if (ptr_info.child == u8) { + // This is a []const u8, or some similar Zig string. + if (!self.options.emit_strings_as_arrays and std.unicode.utf8ValidateSlice(slice)) { + return self.stringValue(slice); + } + } + + try self.beginArray(); + for (slice) |x| { + try self.write(x); + } + try self.endArray(); + return; + }, + else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), + }, + .array => { + // Coerce `[N]T` to `*const [N]T` (and then to `[]const T`). + return self.write(&v); + }, + .vector => |info| { + const array: [info.len]info.child = v; + return self.write(&array); + }, + else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), + } + unreachable; +} + +fn stringValue(self: *Stringify, s: []const u8) !void { + try self.valueStart(); + try encodeJsonString(s, self.options, self.writer); + self.valueDone(); +} + +pub const Options = struct { + /// Controls the whitespace emitted. + /// The default `.minified` is a compact encoding with no whitespace between tokens. + /// Any setting other than `.minified` will use newlines, indentation, and a space after each ':'. + /// `.indent_1` means 1 space for each indentation level, `.indent_2` means 2 spaces, etc. + /// `.indent_tab` uses a tab for each indentation level. + whitespace: enum { + minified, + indent_1, + indent_2, + indent_3, + indent_4, + indent_8, + indent_tab, + } = .minified, + + /// Should optional fields with null value be written? + emit_null_optional_fields: bool = true, + + /// Arrays/slices of u8 are typically encoded as JSON strings. + /// This option emits them as arrays of numbers instead. + /// Does not affect calls to `objectField*()`. + emit_strings_as_arrays: bool = false, + + /// Should unicode characters be escaped in strings? + escape_unicode: bool = false, + + /// When true, renders numbers outside the range `+-1<<53` (the precise integer range of f64) as JSON strings in base 10. + emit_nonportable_numbers_as_strings: bool = false, +}; + +/// Writes the given value to the `Writer` writer. +/// See `Stringify` for how the given value is serialized into JSON. +/// The maximum nesting depth of the output JSON document is 256. +pub fn value(v: anytype, options: Options, writer: *Writer) Error!void { + var s: Stringify = .{ .writer = writer, .options = options }; + try s.write(v); +} + +test value { + var out: std.io.Writer.Allocating = .init(std.testing.allocator); + const writer = &out.writer; + defer out.deinit(); + + const T = struct { a: i32, b: []const u8 }; + try value(T{ .a = 123, .b = "xy" }, .{}, writer); + try std.testing.expectEqualSlices(u8, "{\"a\":123,\"b\":\"xy\"}", out.getWritten()); + + try testStringify("9999999999999999", 9999999999999999, .{}); + try testStringify("\"9999999999999999\"", 9999999999999999, .{ .emit_nonportable_numbers_as_strings = true }); + + try testStringify("[1,1]", @as(@Vector(2, u32), @splat(1)), .{}); + try testStringify("\"AA\"", @as(@Vector(2, u8), @splat('A')), .{}); + try testStringify("[65,65]", @as(@Vector(2, u8), @splat('A')), .{ .emit_strings_as_arrays = true }); + + // void field + try testStringify("{\"foo\":42}", struct { + foo: u32, + bar: void = {}, + }{ .foo = 42 }, .{}); + + const Tuple = struct { []const u8, usize }; + try testStringify("[\"foo\",42]", Tuple{ "foo", 42 }, .{}); + + comptime { + testStringify("false", false, .{}) catch unreachable; + const MyStruct = struct { foo: u32 }; + testStringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ + MyStruct{ .foo = 42 }, + MyStruct{ .foo = 100 }, + MyStruct{ .foo = 1000 }, + }, .{}) catch unreachable; + } +} + +/// Calls `value` and stores the result in dynamically allocated memory instead +/// of taking a writer. +/// +/// Caller owns returned memory. +pub fn valueAlloc(gpa: Allocator, v: anytype, options: Options) error{OutOfMemory}![]u8 { + var aw: std.io.Writer.Allocating = .init(gpa); + defer aw.deinit(); + value(v, options, &aw.writer) catch return error.OutOfMemory; + return aw.toOwnedSlice(); +} + +test valueAlloc { + const allocator = std.testing.allocator; + const expected = + \\{"foo":"bar","answer":42,"my_friend":"sammy"} + ; + const actual = try valueAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{}); + defer allocator.free(actual); + + try std.testing.expectEqualStrings(expected, actual); +} + +fn outputUnicodeEscape(codepoint: u21, w: *Writer) Error!void { + if (codepoint <= 0xFFFF) { + // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), + // then it may be represented as a six-character sequence: a reverse solidus, followed + // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point. + try w.writeAll("\\u"); + try w.printInt(codepoint, 16, .lower, .{ .width = 4, .fill = '0' }); + } else { + assert(codepoint <= 0x10FFFF); + // To escape an extended character that is not in the Basic Multilingual Plane, + // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair. + const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; + const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; + try w.writeAll("\\u"); + try w.printInt(high, 16, .lower, .{ .width = 4, .fill = '0' }); + try w.writeAll("\\u"); + try w.printInt(low, 16, .lower, .{ .width = 4, .fill = '0' }); + } +} + +fn outputSpecialEscape(c: u8, writer: *Writer) Error!void { + switch (c) { + '\\' => try writer.writeAll("\\\\"), + '\"' => try writer.writeAll("\\\""), + 0x08 => try writer.writeAll("\\b"), + 0x0C => try writer.writeAll("\\f"), + '\n' => try writer.writeAll("\\n"), + '\r' => try writer.writeAll("\\r"), + '\t' => try writer.writeAll("\\t"), + else => try outputUnicodeEscape(c, writer), + } +} + +/// Write `string` to `writer` as a JSON encoded string. +pub fn encodeJsonString(string: []const u8, options: Options, writer: *Writer) Error!void { + try writer.writeByte('\"'); + try encodeJsonStringChars(string, options, writer); + try writer.writeByte('\"'); +} + +/// Write `chars` to `writer` as JSON encoded string characters. +pub fn encodeJsonStringChars(chars: []const u8, options: Options, writer: *Writer) Error!void { + var write_cursor: usize = 0; + var i: usize = 0; + if (options.escape_unicode) { + while (i < chars.len) : (i += 1) { + switch (chars[i]) { + // normal ascii character + 0x20...0x21, 0x23...0x5B, 0x5D...0x7E => {}, + 0x00...0x1F, '\\', '\"' => { + // Always must escape these. + try writer.writeAll(chars[write_cursor..i]); + try outputSpecialEscape(chars[i], writer); + write_cursor = i + 1; + }, + 0x7F...0xFF => { + try writer.writeAll(chars[write_cursor..i]); + const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable; + const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable; + try outputUnicodeEscape(codepoint, writer); + i += ulen - 1; + write_cursor = i + 1; + }, + } + } + } else { + while (i < chars.len) : (i += 1) { + switch (chars[i]) { + // normal bytes + 0x20...0x21, 0x23...0x5B, 0x5D...0xFF => {}, + 0x00...0x1F, '\\', '\"' => { + // Always must escape these. + try writer.writeAll(chars[write_cursor..i]); + try outputSpecialEscape(chars[i], writer); + write_cursor = i + 1; + }, + } + } + } + try writer.writeAll(chars[write_cursor..chars.len]); +} + +test "json write stream" { + var out_buf: [1024]u8 = undefined; + var out: Writer = .fixed(&out_buf); + var w: Stringify = .{ .writer = &out, .options = .{ .whitespace = .indent_2 } }; + try testBasicWriteStream(&w); +} + +fn testBasicWriteStream(w: *Stringify) !void { + w.writer.end = 0; + + try w.beginObject(); + + try w.objectField("object"); + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + try w.write(try getJsonObject(arena_allocator.allocator())); + + try w.objectFieldRaw("\"string\""); + try w.write("This is a string"); + + try w.objectField("array"); + try w.beginArray(); + try w.write("Another string"); + try w.write(@as(i32, 1)); + try w.write(@as(f32, 3.5)); + try w.endArray(); + + try w.objectField("int"); + try w.write(@as(i32, 10)); + + try w.objectField("float"); + try w.write(@as(f32, 3.5)); + + try w.endObject(); + + const expected = + \\{ + \\ "object": { + \\ "one": 1, + \\ "two": 2 + \\ }, + \\ "string": "This is a string", + \\ "array": [ + \\ "Another string", + \\ 1, + \\ 3.5 + \\ ], + \\ "int": 10, + \\ "float": 3.5 + \\} + ; + try std.testing.expectEqualStrings(expected, w.writer.buffered()); +} + +fn getJsonObject(allocator: std.mem.Allocator) !std.json.Value { + var v: std.json.Value = .{ .object = std.json.ObjectMap.init(allocator) }; + try v.object.put("one", std.json.Value{ .integer = @as(i64, @intCast(1)) }); + try v.object.put("two", std.json.Value{ .float = 2.0 }); + return v; +} + +test "stringify null optional fields" { + const MyStruct = struct { + optional: ?[]const u8 = null, + required: []const u8 = "something", + another_optional: ?[]const u8 = null, + another_required: []const u8 = "something else", + }; + try testStringify( + \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"} + , + MyStruct{}, + .{}, + ); + try testStringify( + \\{"required":"something","another_required":"something else"} + , + MyStruct{}, + .{ .emit_null_optional_fields = false }, + ); +} + +test "stringify basic types" { + try testStringify("false", false, .{}); + try testStringify("true", true, .{}); + try testStringify("null", @as(?u8, null), .{}); + try testStringify("null", @as(?*u32, null), .{}); + try testStringify("42", 42, .{}); + try testStringify("42", 42.0, .{}); + try testStringify("42", @as(u8, 42), .{}); + try testStringify("42", @as(u128, 42), .{}); + try testStringify("9999999999999999", 9999999999999999, .{}); + try testStringify("42", @as(f32, 42), .{}); + try testStringify("42", @as(f64, 42), .{}); + try testStringify("\"ItBroke\"", @as(anyerror, error.ItBroke), .{}); + try testStringify("\"ItBroke\"", error.ItBroke, .{}); +} + +test "stringify string" { + try testStringify("\"hello\"", "hello", .{}); + try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{}); + try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{ .escape_unicode = true }); + try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{}); + try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{80}\"", "with unicode\u{80}", .{}); + try testStringify("\"with unicode\\u0080\"", "with unicode\u{80}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", .{}); + try testStringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{100}\"", "with unicode\u{100}", .{}); + try testStringify("\"with unicode\\u0100\"", "with unicode\u{100}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{800}\"", "with unicode\u{800}", .{}); + try testStringify("\"with unicode\\u0800\"", "with unicode\u{800}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", .{}); + try testStringify("\"with unicode\\u8000\"", "with unicode\u{8000}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", .{}); + try testStringify("\"with unicode\\ud799\"", "with unicode\u{D799}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", .{}); + try testStringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", .{ .escape_unicode = true }); + try testStringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", .{}); + try testStringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", .{ .escape_unicode = true }); +} + +test "stringify many-item sentinel-terminated string" { + try testStringify("\"hello\"", @as([*:0]const u8, "hello"), .{}); + try testStringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), .{ .escape_unicode = true }); + try testStringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), .{ .escape_unicode = true }); +} + +test "stringify enums" { + const E = enum { + foo, + bar, + }; + try testStringify("\"foo\"", E.foo, .{}); + try testStringify("\"bar\"", E.bar, .{}); +} + +test "stringify non-exhaustive enum" { + const E = enum(u8) { + foo = 0, + _, + }; + try testStringify("\"foo\"", E.foo, .{}); + try testStringify("1", @as(E, @enumFromInt(1)), .{}); +} + +test "stringify enum literals" { + try testStringify("\"foo\"", .foo, .{}); + try testStringify("\"bar\"", .bar, .{}); +} + +test "stringify tagged unions" { + const T = union(enum) { + nothing, + foo: u32, + bar: bool, + }; + try testStringify("{\"nothing\":{}}", T{ .nothing = {} }, .{}); + try testStringify("{\"foo\":42}", T{ .foo = 42 }, .{}); + try testStringify("{\"bar\":true}", T{ .bar = true }, .{}); +} + +test "stringify struct" { + try testStringify("{\"foo\":42}", struct { + foo: u32, + }{ .foo = 42 }, .{}); +} + +test "emit_strings_as_arrays" { + // Should only affect string values, not object keys. + try testStringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, .{}); + try testStringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, .{ .emit_strings_as_arrays = true }); + // Should *not* affect these types: + try testStringify("\"foo\"", @as(enum { foo, bar }, .foo), .{ .emit_strings_as_arrays = true }); + try testStringify("\"ItBroke\"", error.ItBroke, .{ .emit_strings_as_arrays = true }); + // Should work on these: + try testStringify("\"bar\"", @Vector(3, u8){ 'b', 'a', 'r' }, .{}); + try testStringify("[98,97,114]", @Vector(3, u8){ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true }); + try testStringify("\"bar\"", [3]u8{ 'b', 'a', 'r' }, .{}); + try testStringify("[98,97,114]", [3]u8{ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true }); +} + +test "stringify struct with indentation" { + try testStringify( + \\{ + \\ "foo": 42, + \\ "bar": [ + \\ 1, + \\ 2, + \\ 3 + \\ ] + \\} + , + struct { + foo: u32, + bar: [3]u32, + }{ + .foo = 42, + .bar = .{ 1, 2, 3 }, + }, + .{ .whitespace = .indent_4 }, + ); + try testStringify( + "{\n\t\"foo\": 42,\n\t\"bar\": [\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}", + struct { + foo: u32, + bar: [3]u32, + }{ + .foo = 42, + .bar = .{ 1, 2, 3 }, + }, + .{ .whitespace = .indent_tab }, + ); + try testStringify( + \\{"foo":42,"bar":[1,2,3]} + , + struct { + foo: u32, + bar: [3]u32, + }{ + .foo = 42, + .bar = .{ 1, 2, 3 }, + }, + .{ .whitespace = .minified }, + ); +} + +test "stringify array of structs" { + const MyStruct = struct { + foo: u32, + }; + try testStringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ + MyStruct{ .foo = 42 }, + MyStruct{ .foo = 100 }, + MyStruct{ .foo = 1000 }, + }, .{}); +} + +test "stringify struct with custom stringifier" { + try testStringify("[\"something special\",42]", struct { + foo: u32, + const Self = @This(); + pub fn jsonStringify(v: @This(), jws: anytype) !void { + _ = v; + try jws.beginArray(); + try jws.write("something special"); + try jws.write(42); + try jws.endArray(); + } + }{ .foo = 42 }, .{}); +} + +fn testStringify(expected: []const u8, v: anytype, options: Options) !void { + var buffer: [4096]u8 = undefined; + var w: Writer = .fixed(&buffer); + try value(v, options, &w); + try std.testing.expectEqualStrings(expected, w.buffered()); +} + +test "raw streaming" { + var out_buf: [1024]u8 = undefined; + var out: Writer = .fixed(&out_buf); + + var w: Stringify = .{ .writer = &out, .options = .{ .whitespace = .indent_2 } }; + try w.beginObject(); + try w.beginObjectFieldRaw(); + try w.writer.writeAll("\"long"); + try w.writer.writeAll(" key\""); + w.endObjectFieldRaw(); + try w.beginWriteRaw(); + try w.writer.writeAll("\"long"); + try w.writer.writeAll(" value\""); + w.endWriteRaw(); + try w.endObject(); + + const expected = + \\{ + \\ "long key": "long value" + \\} + ; + try std.testing.expectEqualStrings(expected, w.writer.buffered()); +} diff --git a/lib/std/json/dynamic.zig b/lib/std/json/dynamic.zig index 4d24444390..b47e7e1067 100644 --- a/lib/std/json/dynamic.zig +++ b/lib/std/json/dynamic.zig @@ -4,17 +4,12 @@ const ArenaAllocator = std.heap.ArenaAllocator; const ArrayList = std.ArrayList; const StringArrayHashMap = std.StringArrayHashMap; const Allocator = std.mem.Allocator; - -const StringifyOptions = @import("./stringify.zig").StringifyOptions; -const stringify = @import("./stringify.zig").stringify; +const json = std.json; const ParseOptions = @import("./static.zig").ParseOptions; const ParseError = @import("./static.zig").ParseError; -const JsonScanner = @import("./scanner.zig").Scanner; -const AllocWhen = @import("./scanner.zig").AllocWhen; -const Token = @import("./scanner.zig").Token; -const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; +const isNumberFormattedLikeAnInteger = @import("Scanner.zig").isNumberFormattedLikeAnInteger; pub const ObjectMap = StringArrayHashMap(Value); pub const Array = ArrayList(Value); @@ -52,12 +47,11 @@ pub const Value = union(enum) { } } - pub fn dump(self: Value) void { - std.debug.lockStdErr(); - defer std.debug.unlockStdErr(); + pub fn dump(v: Value) void { + const w = std.debug.lockStderrWriter(&.{}); + defer std.debug.unlockStderrWriter(); - const stderr = std.fs.File.stderr().deprecatedWriter(); - stringify(self, .{}, stderr) catch return; + json.Stringify.value(v, .{}, w) catch return; } pub fn jsonStringify(value: @This(), jws: anytype) !void { diff --git a/lib/std/json/dynamic_test.zig b/lib/std/json/dynamic_test.zig index 1362e3cfad..9d991f2c50 100644 --- a/lib/std/json/dynamic_test.zig +++ b/lib/std/json/dynamic_test.zig @@ -1,8 +1,10 @@ const std = @import("std"); +const json = std.json; const mem = std.mem; const testing = std.testing; const ArenaAllocator = std.heap.ArenaAllocator; const Allocator = std.mem.Allocator; +const Writer = std.io.Writer; const ObjectMap = @import("dynamic.zig").ObjectMap; const Array = @import("dynamic.zig").Array; @@ -14,8 +16,7 @@ const parseFromTokenSource = @import("static.zig").parseFromTokenSource; const parseFromValueLeaky = @import("static.zig").parseFromValueLeaky; const ParseOptions = @import("static.zig").ParseOptions; -const jsonReader = @import("scanner.zig").reader; -const JsonReader = @import("scanner.zig").Reader; +const Scanner = @import("Scanner.zig"); test "json.parser.dynamic" { const s = @@ -70,14 +71,10 @@ test "json.parser.dynamic" { try testing.expect(mem.eql(u8, large_int.number_string, "18446744073709551615")); } -const writeStream = @import("./stringify.zig").writeStream; test "write json then parse it" { var out_buffer: [1000]u8 = undefined; - - var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer); - const out_stream = fixed_buffer_stream.writer(); - var jw = writeStream(out_stream, .{}); - defer jw.deinit(); + var fixed_writer: Writer = .fixed(&out_buffer); + var jw: json.Stringify = .{ .writer = &fixed_writer, .options = .{} }; try jw.beginObject(); @@ -101,8 +98,8 @@ test "write json then parse it" { try jw.endObject(); - fixed_buffer_stream = std.io.fixedBufferStream(fixed_buffer_stream.getWritten()); - var json_reader = jsonReader(testing.allocator, fixed_buffer_stream.reader()); + var fbs: std.Io.Reader = .fixed(fixed_writer.buffered()); + var json_reader: Scanner.Reader = .init(testing.allocator, &fbs); defer json_reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &json_reader, .{}); defer parsed.deinit(); @@ -242,10 +239,9 @@ test "Value.jsonStringify" { .{ .object = obj }, }; var buffer: [0x1000]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); + var fixed_writer: Writer = .fixed(&buffer); - var jw = writeStream(fbs.writer(), .{ .whitespace = .indent_1 }); - defer jw.deinit(); + var jw: json.Stringify = .{ .writer = &fixed_writer, .options = .{ .whitespace = .indent_1 } }; try jw.write(array); const expected = @@ -266,7 +262,7 @@ test "Value.jsonStringify" { \\ } \\] ; - try testing.expectEqualStrings(expected, fbs.getWritten()); + try testing.expectEqualStrings(expected, fixed_writer.buffered()); } test "parseFromValue(std.json.Value,...)" { @@ -334,8 +330,8 @@ test "polymorphic parsing" { test "long object value" { const value = "01234567890123456789"; const doc = "{\"key\":\"" ++ value ++ "\"}"; - var fbs = std.io.fixedBufferStream(doc); - var reader = smallBufferJsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(doc); + var reader = smallBufferJsonReader(testing.allocator, &fbs); defer reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{}); defer parsed.deinit(); @@ -367,8 +363,8 @@ test "many object keys" { \\ "k5": "v5" \\} ; - var fbs = std.io.fixedBufferStream(doc); - var reader = smallBufferJsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(doc); + var reader = smallBufferJsonReader(testing.allocator, &fbs); defer reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{}); defer parsed.deinit(); @@ -382,8 +378,8 @@ test "many object keys" { test "negative zero" { const doc = "-0"; - var fbs = std.io.fixedBufferStream(doc); - var reader = smallBufferJsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(doc); + var reader = smallBufferJsonReader(testing.allocator, &fbs); defer reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{}); defer parsed.deinit(); @@ -391,6 +387,6 @@ test "negative zero" { try testing.expect(std.math.isNegativeZero(parsed.value.float)); } -fn smallBufferJsonReader(allocator: Allocator, io_reader: anytype) JsonReader(16, @TypeOf(io_reader)) { - return JsonReader(16, @TypeOf(io_reader)).init(allocator, io_reader); +fn smallBufferJsonReader(allocator: Allocator, io_reader: anytype) Scanner.Reader { + return .init(allocator, io_reader); } diff --git a/lib/std/json/fmt.zig b/lib/std/json/fmt.zig deleted file mode 100644 index c526dce1da..0000000000 --- a/lib/std/json/fmt.zig +++ /dev/null @@ -1,40 +0,0 @@ -const std = @import("../std.zig"); -const assert = std.debug.assert; - -const stringify = @import("stringify.zig").stringify; -const StringifyOptions = @import("stringify.zig").StringifyOptions; - -/// Returns a formatter that formats the given value using stringify. -pub fn fmt(value: anytype, options: StringifyOptions) Formatter(@TypeOf(value)) { - return Formatter(@TypeOf(value)){ .value = value, .options = options }; -} - -/// Formats the given value using stringify. -pub fn Formatter(comptime T: type) type { - return struct { - value: T, - options: StringifyOptions, - - pub fn format(self: @This(), writer: *std.io.Writer) std.io.Writer.Error!void { - try stringify(self.value, self.options, writer); - } - }; -} - -test fmt { - const expectFmt = std.testing.expectFmt; - try expectFmt("123", "{}", .{fmt(@as(u32, 123), .{})}); - try expectFmt( - \\{"num":927,"msg":"hello","sub":{"mybool":true}} - , "{}", .{fmt(struct { - num: u32, - msg: []const u8, - sub: struct { - mybool: bool, - }, - }{ - .num = 927, - .msg = "hello", - .sub = .{ .mybool = true }, - }, .{})}); -} diff --git a/lib/std/json/hashmap_test.zig b/lib/std/json/hashmap_test.zig index 49d8caffae..0544eaa68b 100644 --- a/lib/std/json/hashmap_test.zig +++ b/lib/std/json/hashmap_test.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const json = std.json; const testing = std.testing; const ArrayHashMap = @import("hashmap.zig").ArrayHashMap; @@ -7,10 +8,9 @@ const parseFromSlice = @import("static.zig").parseFromSlice; const parseFromSliceLeaky = @import("static.zig").parseFromSliceLeaky; const parseFromTokenSource = @import("static.zig").parseFromTokenSource; const parseFromValue = @import("static.zig").parseFromValue; -const stringifyAlloc = @import("stringify.zig").stringifyAlloc; const Value = @import("dynamic.zig").Value; -const jsonReader = @import("./scanner.zig").reader; +const Scanner = @import("Scanner.zig"); const T = struct { i: i32, @@ -39,8 +39,8 @@ test "parse json hashmap while streaming" { \\ "xyz": {"i": 1, "s": "w"} \\} ; - var stream = std.io.fixedBufferStream(doc); - var json_reader = jsonReader(testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(doc); + var json_reader: Scanner.Reader = .init(testing.allocator, &stream); var parsed = try parseFromTokenSource( ArrayHashMap(T), @@ -89,7 +89,7 @@ test "stringify json hashmap" { var value = ArrayHashMap(T){}; defer value.deinit(testing.allocator); { - const doc = try stringifyAlloc(testing.allocator, value, .{}); + const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{}); defer testing.allocator.free(doc); try testing.expectEqualStrings("{}", doc); } @@ -98,7 +98,7 @@ test "stringify json hashmap" { try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" }); { - const doc = try stringifyAlloc(testing.allocator, value, .{}); + const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{}); defer testing.allocator.free(doc); try testing.expectEqualStrings( \\{"abc":{"i":0,"s":"d"},"xyz":{"i":1,"s":"w"}} @@ -107,7 +107,7 @@ test "stringify json hashmap" { try testing.expect(value.map.swapRemove("abc")); { - const doc = try stringifyAlloc(testing.allocator, value, .{}); + const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{}); defer testing.allocator.free(doc); try testing.expectEqualStrings( \\{"xyz":{"i":1,"s":"w"}} @@ -116,7 +116,7 @@ test "stringify json hashmap" { try testing.expect(value.map.swapRemove("xyz")); { - const doc = try stringifyAlloc(testing.allocator, value, .{}); + const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{}); defer testing.allocator.free(doc); try testing.expectEqualStrings("{}", doc); } @@ -129,7 +129,7 @@ test "stringify json hashmap whitespace" { try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" }); { - const doc = try stringifyAlloc(testing.allocator, value, .{ .whitespace = .indent_2 }); + const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{ .whitespace = .indent_2 }); defer testing.allocator.free(doc); try testing.expectEqualStrings( \\{ diff --git a/lib/std/json/scanner.zig b/lib/std/json/scanner.zig deleted file mode 100644 index 1836d6775b..0000000000 --- a/lib/std/json/scanner.zig +++ /dev/null @@ -1,1776 +0,0 @@ -// Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259 -// * RFC 8259 requires JSON documents be valid UTF-8, -// but makes an allowance for systems that are "part of a closed ecosystem". -// I have no idea what that's supposed to mean in the context of a standard specification. -// This implementation requires inputs to be valid UTF-8. -// * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits, -// but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed. -// (RFC 5234 defines HEXDIG to only allow uppercase.) -// * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value". -// See http://www.unicode.org/glossary/#unicode_scalar_value . -// * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences, -// but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?), -// which would mean that unpaired surrogate halves are forbidden. -// By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to) -// explicitly allows unpaired surrogate halves. -// This implementation forbids unpaired surrogate halves in \u sequences. -// If a high surrogate half appears in a \u sequence, -// then a low surrogate half must immediately follow in \u notation. -// * RFC 8259 allows implementations to "accept non-JSON forms or extensions". -// This implementation does not accept any of that. -// * RFC 8259 allows implementations to put limits on "the size of texts", -// "the maximum depth of nesting", "the range and precision of numbers", -// and "the length and character contents of strings". -// This low-level implementation does not limit these, -// except where noted above, and except that nesting depth requires memory allocation. -// Note that this low-level API does not interpret numbers numerically, -// but simply emits their source form for some higher level code to make sense of. -// * This low-level implementation allows duplicate object keys, -// and key/value pairs are emitted in the order they appear in the input. - -const std = @import("std"); - -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; -const assert = std.debug.assert; -const BitStack = std.BitStack; - -/// Scan the input and check for malformed JSON. -/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`. -/// Returns any errors from the allocator as-is, which is unlikely, -/// but can be caused by extreme nesting depth in the input. -pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool { - var scanner = Scanner.initCompleteInput(allocator, s); - defer scanner.deinit(); - - while (true) { - const token = scanner.next() catch |err| switch (err) { - error.SyntaxError, error.UnexpectedEndOfInput => return false, - error.OutOfMemory => return error.OutOfMemory, - error.BufferUnderrun => unreachable, - }; - if (token == .end_of_document) break; - } - - return true; -} - -/// The parsing errors are divided into two categories: -/// * `SyntaxError` is for clearly malformed JSON documents, -/// such as giving an input document that isn't JSON at all. -/// * `UnexpectedEndOfInput` is for signaling that everything's been -/// valid so far, but the input appears to be truncated for some reason. -/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`. -pub const Error = error{ SyntaxError, UnexpectedEndOfInput }; - -/// Calls `std.json.Reader` with `std.json.default_buffer_size`. -pub fn reader(allocator: Allocator, io_reader: anytype) Reader(default_buffer_size, @TypeOf(io_reader)) { - return Reader(default_buffer_size, @TypeOf(io_reader)).init(allocator, io_reader); -} -/// Used by `json.reader`. -pub const default_buffer_size = 0x1000; - -/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar: -/// ``` -/// = .end_of_document -/// = -/// | -/// | -/// | -/// | -/// | .true -/// | .false -/// | .null -/// = .object_begin ( )* .object_end -/// = .array_begin ( )* .array_end -/// = -/// = -/// ``` -/// -/// What you get for `` and `` values depends on which `next*()` method you call: -/// -/// ``` -/// next(): -/// = ( .partial_number )* .number -/// = ( )* .string -/// = -/// | .partial_string -/// | .partial_string_escaped_1 -/// | .partial_string_escaped_2 -/// | .partial_string_escaped_3 -/// | .partial_string_escaped_4 -/// -/// nextAlloc*(..., .alloc_always): -/// = .allocated_number -/// = .allocated_string -/// -/// nextAlloc*(..., .alloc_if_needed): -/// = -/// | .number -/// | .allocated_number -/// = -/// | .string -/// | .allocated_string -/// ``` -/// -/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value. -/// For number values, this is the representation of the number exactly as it appears in the input. -/// For strings, this is the content of the string after resolving escape sequences. -/// -/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator. -/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations. -/// -/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences. -/// To get a complete value in memory, you need to concatenate the values yourself. -/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result. -/// -/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer. -/// The memory may become undefined during the next call to `json.Scanner.feedInput()` -/// or any `json.Reader` method whose return error set includes `json.Error`. -/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`, -/// which makes a copy for you. -/// -/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that -/// the previously partial value is completed with no additional bytes. -/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.) -/// `.partial_*` tokens never have `0` length. -/// -/// The recommended strategy for using the different `next*()` methods is something like this: -/// -/// When you're expecting an object key, use `.alloc_if_needed`. -/// You often don't need a copy of the key string to persist; you might just check which field it is. -/// In the case that the key happens to require an allocation, free it immediately after checking it. -/// -/// When you're expecting a meaningful string value (such as on the right of a `:`), -/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document. -/// -/// When you're expecting a number value, use `.alloc_if_needed`. -/// You're probably going to be parsing the string representation of the number into a numeric representation, -/// so you need the complete string representation only temporarily. -/// -/// When you're skipping an unrecognized value, use `skipValue()`. -pub const Token = union(enum) { - object_begin, - object_end, - array_begin, - array_end, - - true, - false, - null, - - number: []const u8, - partial_number: []const u8, - allocated_number: []u8, - - string: []const u8, - partial_string: []const u8, - partial_string_escaped_1: [1]u8, - partial_string_escaped_2: [2]u8, - partial_string_escaped_3: [3]u8, - partial_string_escaped_4: [4]u8, - allocated_string: []u8, - - end_of_document, -}; - -/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call. -pub const TokenType = enum { - object_begin, - object_end, - array_begin, - array_end, - true, - false, - null, - number, - string, - end_of_document, -}; - -/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);` -/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized. -/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()` -/// to get meaningful information from this. -pub const Diagnostics = struct { - line_number: u64 = 1, - line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1. - total_bytes_before_current_input: u64 = 0, - cursor_pointer: *const usize = undefined, - - /// Starts at 1. - pub fn getLine(self: *const @This()) u64 { - return self.line_number; - } - /// Starts at 1. - pub fn getColumn(self: *const @This()) u64 { - return self.cursor_pointer.* -% self.line_start_cursor; - } - /// Starts at 0. Measures the byte offset since the start of the input. - pub fn getByteOffset(self: *const @This()) u64 { - return self.total_bytes_before_current_input + self.cursor_pointer.*; - } -}; - -/// See the documentation for `std.json.Token`. -pub const AllocWhen = enum { alloc_if_needed, alloc_always }; - -/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default. -/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`. -pub const default_max_value_len = 4 * 1024 * 1024; - -/// Connects a `std.io.GenericReader` to a `std.json.Scanner`. -/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader. -pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type { - return struct { - scanner: Scanner, - reader: ReaderType, - - buffer: [buffer_size]u8 = undefined, - - /// The allocator is only used to track `[]` and `{}` nesting levels. - pub fn init(allocator: Allocator, io_reader: ReaderType) @This() { - return .{ - .scanner = Scanner.initStreaming(allocator), - .reader = io_reader, - }; - } - pub fn deinit(self: *@This()) void { - self.scanner.deinit(); - self.* = undefined; - } - - /// Calls `std.json.Scanner.enableDiagnostics`. - pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { - self.scanner.enableDiagnostics(diagnostics); - } - - pub const NextError = ReaderType.Error || Error || Allocator.Error; - pub const SkipError = NextError; - pub const AllocError = NextError || error{ValueTooLong}; - pub const PeekError = ReaderType.Error || Error; - - /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { - return self.nextAllocMax(allocator, when, default_max_value_len); - } - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { - const token_type = try self.peekNextTokenType(); - switch (token_type) { - .number, .string => { - var value_list = ArrayList(u8).init(allocator); - errdefer { - value_list.deinit(); - } - if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| { - return if (token_type == .number) - Token{ .number = slice } - else - Token{ .string = slice }; - } else { - return if (token_type == .number) - Token{ .allocated_number = try value_list.toOwnedSlice() } - else - Token{ .allocated_string = try value_list.toOwnedSlice() }; - } - }, - - // Simple tokens never alloc. - .object_begin, - .object_end, - .array_begin, - .array_end, - .true, - .false, - .null, - .end_of_document, - => return try self.next(), - } - } - - /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` - pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocError!?[]const u8 { - return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); - } - /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`. - pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocError!?[]const u8 { - while (true) { - return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`. - pub fn skipValue(self: *@This()) SkipError!void { - switch (try self.peekNextTokenType()) { - .object_begin, .array_begin => { - try self.skipUntilStackHeight(self.stackHeight()); - }, - .number, .string => { - while (true) { - switch (try self.next()) { - .partial_number, - .partial_string, - .partial_string_escaped_1, - .partial_string_escaped_2, - .partial_string_escaped_3, - .partial_string_escaped_4, - => continue, - - .number, .string => break, - - else => unreachable, - } - } - }, - .true, .false, .null => { - _ = try self.next(); - }, - - .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. - } - } - /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`. - pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void { - while (true) { - return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - /// Calls `std.json.Scanner.stackHeight`. - pub fn stackHeight(self: *const @This()) usize { - return self.scanner.stackHeight(); - } - /// Calls `std.json.Scanner.ensureTotalStackCapacity`. - pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { - try self.scanner.ensureTotalStackCapacity(height); - } - - /// See `std.json.Token` for documentation of this function. - pub fn next(self: *@This()) NextError!Token { - while (true) { - return self.scanner.next() catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - /// See `std.json.Scanner.peekNextTokenType()`. - pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { - while (true) { - return self.scanner.peekNextTokenType() catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - fn refillBuffer(self: *@This()) ReaderType.Error!void { - const input = self.buffer[0..try self.reader.read(self.buffer[0..])]; - if (input.len > 0) { - self.scanner.feedInput(input); - } else { - self.scanner.endInput(); - } - } - }; -} - -/// The lowest level parsing API in this package; -/// supports streaming input with a low memory footprint. -/// The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input. -/// Specifically `d/8` bytes are required for this purpose, -/// with some extra buffer according to the implementation of `std.ArrayList`. -/// -/// This scanner can emit partial tokens; see `std.json.Token`. -/// The input to this class is a sequence of input buffers that you must supply one at a time. -/// Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned. -/// Then call `feedInput()` again and so forth. -/// Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`, -/// or when `error.BufferUnderrun` requests more data and there is no more. -/// Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned. -pub const Scanner = struct { - state: State = .value, - string_is_object_key: bool = false, - stack: BitStack, - value_start: usize = undefined, - utf16_code_units: [2]u16 = undefined, - - input: []const u8 = "", - cursor: usize = 0, - is_end_of_input: bool = false, - diagnostics: ?*Diagnostics = null, - - /// The allocator is only used to track `[]` and `{}` nesting levels. - pub fn initStreaming(allocator: Allocator) @This() { - return .{ - .stack = BitStack.init(allocator), - }; - } - /// Use this if your input is a single slice. - /// This is effectively equivalent to: - /// ``` - /// initStreaming(allocator); - /// feedInput(complete_input); - /// endInput(); - /// ``` - pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() { - return .{ - .stack = BitStack.init(allocator), - .input = complete_input, - .is_end_of_input = true, - }; - } - pub fn deinit(self: *@This()) void { - self.stack.deinit(); - self.* = undefined; - } - - pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { - diagnostics.cursor_pointer = &self.cursor; - self.diagnostics = diagnostics; - } - - /// Call this whenever you get `error.BufferUnderrun` from `next()`. - /// When there is no more input to provide, call `endInput()`. - pub fn feedInput(self: *@This(), input: []const u8) void { - assert(self.cursor == self.input.len); // Not done with the last input slice. - if (self.diagnostics) |diag| { - diag.total_bytes_before_current_input += self.input.len; - // This usually goes "negative" to measure how far before the beginning - // of the new buffer the current line started. - diag.line_start_cursor -%= self.cursor; - } - self.input = input; - self.cursor = 0; - self.value_start = 0; - } - /// Call this when you will no longer call `feedInput()` anymore. - /// This can be called either immediately after the last `feedInput()`, - /// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`. - /// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`. - pub fn endInput(self: *@This()) void { - self.is_end_of_input = true; - } - - pub const NextError = Error || Allocator.Error || error{BufferUnderrun}; - pub const AllocError = Error || Allocator.Error || error{ValueTooLong}; - pub const PeekError = Error || error{BufferUnderrun}; - pub const SkipError = Error || Allocator.Error; - pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun}; - - /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` - /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { - return self.nextAllocMax(allocator, when, default_max_value_len); - } - - /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { - assert(self.is_end_of_input); // This function is not available in streaming mode. - const token_type = self.peekNextTokenType() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }; - switch (token_type) { - .number, .string => { - var value_list = ArrayList(u8).init(allocator); - errdefer { - value_list.deinit(); - } - if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }) |slice| { - return if (token_type == .number) - Token{ .number = slice } - else - Token{ .string = slice }; - } else { - return if (token_type == .number) - Token{ .allocated_number = try value_list.toOwnedSlice() } - else - Token{ .allocated_string = try value_list.toOwnedSlice() }; - } - }, - - // Simple tokens never alloc. - .object_begin, - .object_end, - .array_begin, - .array_end, - .true, - .false, - .null, - .end_of_document, - => return self.next() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }, - } - } - - /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` - pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 { - return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); - } - /// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`. - /// When allocation is not necessary with `.alloc_if_needed`, - /// this method returns the content slice from the input buffer, and `value_list` is not touched. - /// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`, - /// and returns `null` once the final `.number` or `.string` token has been written into it. - /// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list. - /// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation - /// can be resumed by passing the same array list in again. - /// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type; - /// the caller of this method is expected to know which type of token is being processed. - pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 { - while (true) { - const token = try self.next(); - switch (token) { - // Accumulate partial values. - .partial_number, .partial_string => |slice| { - try appendSlice(value_list, slice, max_value_len); - }, - .partial_string_escaped_1 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - .partial_string_escaped_2 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - .partial_string_escaped_3 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - .partial_string_escaped_4 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - - // Return complete values. - .number => |slice| { - if (when == .alloc_if_needed and value_list.items.len == 0) { - // No alloc necessary. - return slice; - } - try appendSlice(value_list, slice, max_value_len); - // The token is complete. - return null; - }, - .string => |slice| { - if (when == .alloc_if_needed and value_list.items.len == 0) { - // No alloc necessary. - return slice; - } - try appendSlice(value_list, slice, max_value_len); - // The token is complete. - return null; - }, - - .object_begin, - .object_end, - .array_begin, - .array_end, - .true, - .false, - .null, - .end_of_document, - => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this. - - .allocated_number, .allocated_string => unreachable, - } - } - } - - /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. - /// If the next token type is `.object_begin` or `.array_begin`, - /// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found. - /// If the next token type is `.number` or `.string`, - /// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found. - /// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once. - /// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`; - /// see `peekNextTokenType()`. - pub fn skipValue(self: *@This()) SkipError!void { - assert(self.is_end_of_input); // This function is not available in streaming mode. - switch (self.peekNextTokenType() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }) { - .object_begin, .array_begin => { - self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }; - }, - .number, .string => { - while (true) { - switch (self.next() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }) { - .partial_number, - .partial_string, - .partial_string_escaped_1, - .partial_string_escaped_2, - .partial_string_escaped_3, - .partial_string_escaped_4, - => continue, - - .number, .string => break, - - else => unreachable, - } - } - }, - .true, .false, .null => { - _ = self.next() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }; - }, - - .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. - } - } - - /// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height. - /// Unlike `skipValue()`, this function is available in streaming mode. - pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void { - while (true) { - switch (try self.next()) { - .object_end, .array_end => { - if (self.stackHeight() == terminal_stack_height) break; - }, - .end_of_document => unreachable, - else => continue, - } - } - } - - /// The depth of `{}` or `[]` nesting levels at the current position. - pub fn stackHeight(self: *const @This()) usize { - return self.stack.bit_len; - } - - /// Pre allocate memory to hold the given number of nesting levels. - /// `stackHeight()` up to the given number will not cause allocations. - pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { - try self.stack.ensureTotalCapacity(height); - } - - /// See `std.json.Token` for documentation of this function. - pub fn next(self: *@This()) NextError!Token { - state_loop: while (true) { - switch (self.state) { - .value => { - switch (try self.skipWhitespaceExpectByte()) { - // Object, Array - '{' => { - try self.stack.push(OBJECT_MODE); - self.cursor += 1; - self.state = .object_start; - return .object_begin; - }, - '[' => { - try self.stack.push(ARRAY_MODE); - self.cursor += 1; - self.state = .array_start; - return .array_begin; - }, - - // String - '"' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - continue :state_loop; - }, - - // Number - '1'...'9' => { - self.value_start = self.cursor; - self.cursor += 1; - self.state = .number_int; - continue :state_loop; - }, - '0' => { - self.value_start = self.cursor; - self.cursor += 1; - self.state = .number_leading_zero; - continue :state_loop; - }, - '-' => { - self.value_start = self.cursor; - self.cursor += 1; - self.state = .number_minus; - continue :state_loop; - }, - - // literal values - 't' => { - self.cursor += 1; - self.state = .literal_t; - continue :state_loop; - }, - 'f' => { - self.cursor += 1; - self.state = .literal_f; - continue :state_loop; - }, - 'n' => { - self.cursor += 1; - self.state = .literal_n; - continue :state_loop; - }, - - else => return error.SyntaxError, - } - }, - - .post_value => { - if (try self.skipWhitespaceCheckEnd()) return .end_of_document; - - const c = self.input[self.cursor]; - if (self.string_is_object_key) { - self.string_is_object_key = false; - switch (c) { - ':' => { - self.cursor += 1; - self.state = .value; - continue :state_loop; - }, - else => return error.SyntaxError, - } - } - - switch (c) { - '}' => { - if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError; - self.cursor += 1; - // stay in .post_value state. - return .object_end; - }, - ']' => { - if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError; - self.cursor += 1; - // stay in .post_value state. - return .array_end; - }, - ',' => { - switch (self.stack.peek()) { - OBJECT_MODE => { - self.state = .object_post_comma; - }, - ARRAY_MODE => { - self.state = .value; - }, - } - self.cursor += 1; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - - .object_start => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - self.string_is_object_key = true; - continue :state_loop; - }, - '}' => { - self.cursor += 1; - _ = self.stack.pop(); - self.state = .post_value; - return .object_end; - }, - else => return error.SyntaxError, - } - }, - .object_post_comma => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - self.string_is_object_key = true; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - - .array_start => { - switch (try self.skipWhitespaceExpectByte()) { - ']' => { - self.cursor += 1; - _ = self.stack.pop(); - self.state = .post_value; - return .array_end; - }, - else => { - self.state = .value; - continue :state_loop; - }, - } - }, - - .number_minus => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0' => { - self.cursor += 1; - self.state = .number_leading_zero; - continue :state_loop; - }, - '1'...'9' => { - self.cursor += 1; - self.state = .number_int; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_leading_zero => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true); - switch (self.input[self.cursor]) { - '.' => { - self.cursor += 1; - self.state = .number_post_dot; - continue :state_loop; - }, - 'e', 'E' => { - self.cursor += 1; - self.state = .number_post_e; - continue :state_loop; - }, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - }, - .number_int => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - '0'...'9' => continue, - '.' => { - self.cursor += 1; - self.state = .number_post_dot; - continue :state_loop; - }, - 'e', 'E' => { - self.cursor += 1; - self.state = .number_post_e; - continue :state_loop; - }, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - } - return self.endOfBufferInNumber(true); - }, - .number_post_dot => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0'...'9' => { - self.cursor += 1; - self.state = .number_frac; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_frac => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - '0'...'9' => continue, - 'e', 'E' => { - self.cursor += 1; - self.state = .number_post_e; - continue :state_loop; - }, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - } - return self.endOfBufferInNumber(true); - }, - .number_post_e => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0'...'9' => { - self.cursor += 1; - self.state = .number_exp; - continue :state_loop; - }, - '+', '-' => { - self.cursor += 1; - self.state = .number_post_e_sign; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_post_e_sign => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0'...'9' => { - self.cursor += 1; - self.state = .number_exp; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_exp => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - '0'...'9' => continue, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - } - return self.endOfBufferInNumber(true); - }, - - .string => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string. - - // ASCII plain text. - 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue, - - // Special characters. - '"' => { - const result = Token{ .string = self.takeValueSlice() }; - self.cursor += 1; - self.state = .post_value; - return result; - }, - '\\' => { - const slice = self.takeValueSlice(); - self.cursor += 1; - self.state = .string_backslash; - if (slice.len > 0) return Token{ .partial_string = slice }; - continue :state_loop; - }, - - // UTF-8 validation. - // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String - 0xC2...0xDF => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - 0xE0 => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte_guard_against_overlong; - continue :state_loop; - }, - 0xE1...0xEC, 0xEE...0xEF => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - 0xED => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half; - continue :state_loop; - }, - 0xF0 => { - self.cursor += 1; - self.state = .string_utf8_third_to_last_byte_guard_against_overlong; - continue :state_loop; - }, - 0xF1...0xF3 => { - self.cursor += 1; - self.state = .string_utf8_third_to_last_byte; - continue :state_loop; - }, - 0xF4 => { - self.cursor += 1; - self.state = .string_utf8_third_to_last_byte_guard_against_too_large; - continue :state_loop; - }, - 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8. - } - } - if (self.is_end_of_input) return error.UnexpectedEndOfInput; - const slice = self.takeValueSlice(); - if (slice.len > 0) return Token{ .partial_string = slice }; - return error.BufferUnderrun; - }, - .string_backslash => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - '"', '\\', '/' => { - // Since these characters now represent themselves literally, - // we can simply begin the next plaintext slice here. - self.value_start = self.cursor; - self.cursor += 1; - self.state = .string; - continue :state_loop; - }, - 'b' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{0x08} }; - }, - 'f' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{0x0c} }; - }, - 'n' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{'\n'} }; - }, - 'r' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{'\r'} }; - }, - 't' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{'\t'} }; - }, - 'u' => { - self.cursor += 1; - self.state = .string_backslash_u; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .string_backslash_u => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] = @as(u16, c - '0') << 12; - }, - 'A'...'F' => { - self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12; - }, - 'a'...'f' => { - self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.state = .string_backslash_u_1; - continue :state_loop; - }, - .string_backslash_u_1 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] |= @as(u16, c - '0') << 8; - }, - 'A'...'F' => { - self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8; - }, - 'a'...'f' => { - self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.state = .string_backslash_u_2; - continue :state_loop; - }, - .string_backslash_u_2 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] |= @as(u16, c - '0') << 4; - }, - 'A'...'F' => { - self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4; - }, - 'a'...'f' => { - self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.state = .string_backslash_u_3; - continue :state_loop; - }, - .string_backslash_u_3 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] |= c - '0'; - }, - 'A'...'F' => { - self.utf16_code_units[0] |= c - 'A' + 10; - }, - 'a'...'f' => { - self.utf16_code_units[0] |= c - 'a' + 10; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) { - self.state = .string_surrogate_half; - continue :state_loop; - } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) { - return error.SyntaxError; // Unexpected low surrogate half. - } else { - self.value_start = self.cursor; - self.state = .string; - return partialStringCodepoint(self.utf16_code_units[0]); - } - }, - .string_surrogate_half => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - '\\' => { - self.cursor += 1; - self.state = .string_surrogate_half_backslash; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 'u' => { - self.cursor += 1; - self.state = .string_surrogate_half_backslash_u; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash_u => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 'D', 'd' => { - self.cursor += 1; - self.utf16_code_units[1] = 0xD << 12; - self.state = .string_surrogate_half_backslash_u_1; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash_u_1 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - 'C'...'F' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8; - self.state = .string_surrogate_half_backslash_u_2; - continue :state_loop; - }, - 'c'...'f' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8; - self.state = .string_surrogate_half_backslash_u_2; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash_u_2 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - '0') << 4; - self.state = .string_surrogate_half_backslash_u_3; - continue :state_loop; - }, - 'A'...'F' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4; - self.state = .string_surrogate_half_backslash_u_3; - continue :state_loop; - }, - 'a'...'f' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4; - self.state = .string_surrogate_half_backslash_u_3; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .string_surrogate_half_backslash_u_3 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[1] |= c - '0'; - }, - 'A'...'F' => { - self.utf16_code_units[1] |= c - 'A' + 10; - }, - 'a'...'f' => { - self.utf16_code_units[1] |= c - 'a' + 10; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable; - return partialStringCodepoint(code_point); - }, - - .string_utf8_last_byte => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0xBF => { - self.cursor += 1; - self.state = .string; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_second_to_last_byte => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0xBF => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_second_to_last_byte_guard_against_overlong => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0xA0...0xBF => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_second_to_last_byte_guard_against_surrogate_half => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0x9F => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_third_to_last_byte => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0xBF => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_third_to_last_byte_guard_against_overlong => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x90...0xBF => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_third_to_last_byte_guard_against_too_large => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0x8F => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - - .literal_t => { - switch (try self.expectByte()) { - 'r' => { - self.cursor += 1; - self.state = .literal_tr; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_tr => { - switch (try self.expectByte()) { - 'u' => { - self.cursor += 1; - self.state = .literal_tru; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_tru => { - switch (try self.expectByte()) { - 'e' => { - self.cursor += 1; - self.state = .post_value; - return .true; - }, - else => return error.SyntaxError, - } - }, - .literal_f => { - switch (try self.expectByte()) { - 'a' => { - self.cursor += 1; - self.state = .literal_fa; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_fa => { - switch (try self.expectByte()) { - 'l' => { - self.cursor += 1; - self.state = .literal_fal; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_fal => { - switch (try self.expectByte()) { - 's' => { - self.cursor += 1; - self.state = .literal_fals; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_fals => { - switch (try self.expectByte()) { - 'e' => { - self.cursor += 1; - self.state = .post_value; - return .false; - }, - else => return error.SyntaxError, - } - }, - .literal_n => { - switch (try self.expectByte()) { - 'u' => { - self.cursor += 1; - self.state = .literal_nu; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_nu => { - switch (try self.expectByte()) { - 'l' => { - self.cursor += 1; - self.state = .literal_nul; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_nul => { - switch (try self.expectByte()) { - 'l' => { - self.cursor += 1; - self.state = .post_value; - return .null; - }, - else => return error.SyntaxError, - } - }, - } - unreachable; - } - } - - /// Seeks ahead in the input until the first byte of the next token (or the end of the input) - /// determines which type of token will be returned from the next `next*()` call. - /// This function is idempotent, only advancing past commas, colons, and inter-token whitespace. - pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { - state_loop: while (true) { - switch (self.state) { - .value => { - switch (try self.skipWhitespaceExpectByte()) { - '{' => return .object_begin, - '[' => return .array_begin, - '"' => return .string, - '-', '0'...'9' => return .number, - 't' => return .true, - 'f' => return .false, - 'n' => return .null, - else => return error.SyntaxError, - } - }, - - .post_value => { - if (try self.skipWhitespaceCheckEnd()) return .end_of_document; - - const c = self.input[self.cursor]; - if (self.string_is_object_key) { - self.string_is_object_key = false; - switch (c) { - ':' => { - self.cursor += 1; - self.state = .value; - continue :state_loop; - }, - else => return error.SyntaxError, - } - } - - switch (c) { - '}' => return .object_end, - ']' => return .array_end, - ',' => { - switch (self.stack.peek()) { - OBJECT_MODE => { - self.state = .object_post_comma; - }, - ARRAY_MODE => { - self.state = .value; - }, - } - self.cursor += 1; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - - .object_start => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => return .string, - '}' => return .object_end, - else => return error.SyntaxError, - } - }, - .object_post_comma => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => return .string, - else => return error.SyntaxError, - } - }, - - .array_start => { - switch (try self.skipWhitespaceExpectByte()) { - ']' => return .array_end, - else => { - self.state = .value; - continue :state_loop; - }, - } - }, - - .number_minus, - .number_leading_zero, - .number_int, - .number_post_dot, - .number_frac, - .number_post_e, - .number_post_e_sign, - .number_exp, - => return .number, - - .string, - .string_backslash, - .string_backslash_u, - .string_backslash_u_1, - .string_backslash_u_2, - .string_backslash_u_3, - .string_surrogate_half, - .string_surrogate_half_backslash, - .string_surrogate_half_backslash_u, - .string_surrogate_half_backslash_u_1, - .string_surrogate_half_backslash_u_2, - .string_surrogate_half_backslash_u_3, - => return .string, - - .string_utf8_last_byte, - .string_utf8_second_to_last_byte, - .string_utf8_second_to_last_byte_guard_against_overlong, - .string_utf8_second_to_last_byte_guard_against_surrogate_half, - .string_utf8_third_to_last_byte, - .string_utf8_third_to_last_byte_guard_against_overlong, - .string_utf8_third_to_last_byte_guard_against_too_large, - => return .string, - - .literal_t, - .literal_tr, - .literal_tru, - => return .true, - .literal_f, - .literal_fa, - .literal_fal, - .literal_fals, - => return .false, - .literal_n, - .literal_nu, - .literal_nul, - => return .null, - } - unreachable; - } - } - - const State = enum { - value, - post_value, - - object_start, - object_post_comma, - - array_start, - - number_minus, - number_leading_zero, - number_int, - number_post_dot, - number_frac, - number_post_e, - number_post_e_sign, - number_exp, - - string, - string_backslash, - string_backslash_u, - string_backslash_u_1, - string_backslash_u_2, - string_backslash_u_3, - string_surrogate_half, - string_surrogate_half_backslash, - string_surrogate_half_backslash_u, - string_surrogate_half_backslash_u_1, - string_surrogate_half_backslash_u_2, - string_surrogate_half_backslash_u_3, - - // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String - string_utf8_last_byte, // State A - string_utf8_second_to_last_byte, // State B - string_utf8_second_to_last_byte_guard_against_overlong, // State C - string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D - string_utf8_third_to_last_byte, // State E - string_utf8_third_to_last_byte_guard_against_overlong, // State F - string_utf8_third_to_last_byte_guard_against_too_large, // State G - - literal_t, - literal_tr, - literal_tru, - literal_f, - literal_fa, - literal_fal, - literal_fals, - literal_n, - literal_nu, - literal_nul, - }; - - fn expectByte(self: *const @This()) !u8 { - if (self.cursor < self.input.len) { - return self.input[self.cursor]; - } - // No byte. - if (self.is_end_of_input) return error.UnexpectedEndOfInput; - return error.BufferUnderrun; - } - - fn skipWhitespace(self: *@This()) void { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - // Whitespace - ' ', '\t', '\r' => continue, - '\n' => { - if (self.diagnostics) |diag| { - diag.line_number += 1; - // This will count the newline itself, - // which means a straight-forward subtraction will give a 1-based column number. - diag.line_start_cursor = self.cursor; - } - continue; - }, - else => return, - } - } - } - - fn skipWhitespaceExpectByte(self: *@This()) !u8 { - self.skipWhitespace(); - return self.expectByte(); - } - - fn skipWhitespaceCheckEnd(self: *@This()) !bool { - self.skipWhitespace(); - if (self.cursor >= self.input.len) { - // End of buffer. - if (self.is_end_of_input) { - // End of everything. - if (self.stackHeight() == 0) { - // We did it! - return true; - } - return error.UnexpectedEndOfInput; - } - return error.BufferUnderrun; - } - if (self.stackHeight() == 0) return error.SyntaxError; - return false; - } - - fn takeValueSlice(self: *@This()) []const u8 { - const slice = self.input[self.value_start..self.cursor]; - self.value_start = self.cursor; - return slice; - } - fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 { - // Check if the escape sequence started before the current input buffer. - // (The algebra here is awkward to avoid unsigned underflow, - // but it's just making sure the slice on the next line isn't UB.) - if (self.cursor <= self.value_start + trailing_negative_offset) return ""; - const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset]; - // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter, - // because we always set it again while emitting the .partial_string_escaped_*. - self.value_start = self.cursor; - return slice; - } - - fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token { - const slice = self.takeValueSlice(); - if (self.is_end_of_input) { - if (!allow_end) return error.UnexpectedEndOfInput; - self.state = .post_value; - return Token{ .number = slice }; - } - if (slice.len == 0) return error.BufferUnderrun; - return Token{ .partial_number = slice }; - } - - fn endOfBufferInString(self: *@This()) !Token { - if (self.is_end_of_input) return error.UnexpectedEndOfInput; - const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) { - // Don't include the escape sequence in the partial string. - .string_backslash => 1, - .string_backslash_u => 2, - .string_backslash_u_1 => 3, - .string_backslash_u_2 => 4, - .string_backslash_u_3 => 5, - .string_surrogate_half => 6, - .string_surrogate_half_backslash => 7, - .string_surrogate_half_backslash_u => 8, - .string_surrogate_half_backslash_u_1 => 9, - .string_surrogate_half_backslash_u_2 => 10, - .string_surrogate_half_backslash_u_3 => 11, - - // Include everything up to the cursor otherwise. - .string, - .string_utf8_last_byte, - .string_utf8_second_to_last_byte, - .string_utf8_second_to_last_byte_guard_against_overlong, - .string_utf8_second_to_last_byte_guard_against_surrogate_half, - .string_utf8_third_to_last_byte, - .string_utf8_third_to_last_byte_guard_against_overlong, - .string_utf8_third_to_last_byte_guard_against_too_large, - => 0, - - else => unreachable, - }); - if (slice.len == 0) return error.BufferUnderrun; - return Token{ .partial_string = slice }; - } - - fn partialStringCodepoint(code_point: u21) Token { - var buf: [4]u8 = undefined; - switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) { - 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* }, - 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* }, - 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* }, - 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* }, - else => unreachable, - } - } -}; - -const OBJECT_MODE = 0; -const ARRAY_MODE = 1; - -fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void { - const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong; - if (new_len > max_value_len) return error.ValueTooLong; - try list.appendSlice(buf); -} - -/// For the slice you get from a `Token.number` or `Token.allocated_number`, -/// this function returns true if the number doesn't contain any fraction or exponent components, and is not `-0`. -/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`. -/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value. -/// This function will not give meaningful results on non-numeric input. -pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool { - if (std.mem.eql(u8, value, "-0")) return false; - return std.mem.indexOfAny(u8, value, ".eE") == null; -} - -test { - _ = @import("./scanner_test.zig"); -} diff --git a/lib/std/json/scanner_test.zig b/lib/std/json/scanner_test.zig index d085cb661a..eb5d5cb75e 100644 --- a/lib/std/json/scanner_test.zig +++ b/lib/std/json/scanner_test.zig @@ -1,13 +1,11 @@ const std = @import("std"); -const JsonScanner = @import("./scanner.zig").Scanner; -const jsonReader = @import("./scanner.zig").reader; -const JsonReader = @import("./scanner.zig").Reader; -const Token = @import("./scanner.zig").Token; -const TokenType = @import("./scanner.zig").TokenType; -const Diagnostics = @import("./scanner.zig").Diagnostics; -const Error = @import("./scanner.zig").Error; -const validate = @import("./scanner.zig").validate; -const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; +const Scanner = @import("Scanner.zig"); +const Token = Scanner.Token; +const TokenType = Scanner.TokenType; +const Diagnostics = Scanner.Diagnostics; +const Error = Scanner.Error; +const validate = Scanner.validate; +const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger; const example_document_str = \\{ @@ -36,7 +34,7 @@ fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, ex } test "token" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str); + var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str); defer scanner.deinit(); try expectNext(&scanner, .object_begin); @@ -138,23 +136,25 @@ fn testAllTypes(source: anytype, large_buffer: bool) !void { } test "peek all types" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, all_types_test_case); + var scanner = Scanner.initCompleteInput(std.testing.allocator, all_types_test_case); defer scanner.deinit(); try testAllTypes(&scanner, true); - var stream = std.io.fixedBufferStream(all_types_test_case); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(all_types_test_case); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); try testAllTypes(&json_reader, true); - var tiny_stream = std.io.fixedBufferStream(all_types_test_case); - var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + var tiny_buffer: [1]u8 = undefined; + var tiny_stream: std.testing.Reader = .init(&tiny_buffer, &.{.{ .buffer = all_types_test_case }}); + tiny_stream.artificial_limit = .limited(1); + var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream.interface); defer tiny_json_reader.deinit(); try testAllTypes(&tiny_json_reader, false); } test "token mismatched close" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }"); + var scanner = Scanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }"); defer scanner.deinit(); try expectNext(&scanner, .array_begin); try expectNext(&scanner, Token{ .number = "102" }); @@ -164,15 +164,15 @@ test "token mismatched close" { } test "token premature object close" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "{ \"key\": }"); + var scanner = Scanner.initCompleteInput(std.testing.allocator, "{ \"key\": }"); defer scanner.deinit(); try expectNext(&scanner, .object_begin); try expectNext(&scanner, Token{ .string = "key" }); try std.testing.expectError(error.SyntaxError, scanner.next()); } -test "JsonScanner basic" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str); +test "Scanner basic" { + var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str); defer scanner.deinit(); while (true) { @@ -181,10 +181,10 @@ test "JsonScanner basic" { } } -test "JsonReader basic" { - var stream = std.io.fixedBufferStream(example_document_str); +test "Scanner.Reader basic" { + var stream: std.Io.Reader = .fixed(example_document_str); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); while (true) { @@ -215,7 +215,7 @@ const number_test_items = blk: { test "numbers" { for (number_test_items) |number_str| { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, number_str); + var scanner = Scanner.initCompleteInput(std.testing.allocator, number_str); defer scanner.deinit(); const token = try scanner.next(); @@ -243,10 +243,10 @@ const string_test_cases = .{ test "strings" { inline for (string_test_cases) |tuple| { - var stream = std.io.fixedBufferStream("\"" ++ tuple[0] ++ "\""); + var stream: std.Io.Reader = .fixed("\"" ++ tuple[0] ++ "\""); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed); @@ -289,7 +289,7 @@ test "nesting" { } fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, document_str); + var scanner = Scanner.initCompleteInput(std.testing.allocator, document_str); defer scanner.deinit(); while (true) { @@ -352,12 +352,12 @@ fn expectEqualTokens(expected_token: Token, actual_token: Token) !void { } fn testTinyBufferSize(document_str: []const u8) !void { - var tiny_stream = std.io.fixedBufferStream(document_str); - var normal_stream = std.io.fixedBufferStream(document_str); + var tiny_stream: std.Io.Reader = .fixed(document_str); + var normal_stream: std.Io.Reader = .fixed(document_str); - var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream); defer tiny_json_reader.deinit(); - var normal_json_reader = JsonReader(0x1000, @TypeOf(normal_stream.reader())).init(std.testing.allocator, normal_stream.reader()); + var normal_json_reader: Scanner.Reader = .init(std.testing.allocator, &normal_stream); defer normal_json_reader.deinit(); expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| { @@ -397,13 +397,13 @@ test "validate" { } fn testSkipValue(s: []const u8) !void { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s); + var scanner = Scanner.initCompleteInput(std.testing.allocator, s); defer scanner.deinit(); try scanner.skipValue(); try expectEqualTokens(.end_of_document, try scanner.next()); - var stream = std.io.fixedBufferStream(s); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(s); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); try json_reader.skipValue(); try expectEqualTokens(.end_of_document, try json_reader.next()); @@ -441,7 +441,7 @@ fn testEnsureStackCapacity(do_ensure: bool) !void { try input_string.appendNTimes(std.testing.allocator, ']', nestings); defer input_string.deinit(std.testing.allocator); - var scanner = JsonScanner.initCompleteInput(failing_allocator, input_string.items); + var scanner = Scanner.initCompleteInput(failing_allocator, input_string.items); defer scanner.deinit(); if (do_ensure) { @@ -473,17 +473,17 @@ fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byt try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset()); } fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s); + var scanner = Scanner.initCompleteInput(std.testing.allocator, s); defer scanner.deinit(); try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner); - var tiny_stream = std.io.fixedBufferStream(s); - var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + var tiny_stream: std.Io.Reader = .fixed(s); + var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream); defer tiny_json_reader.deinit(); try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader); - var medium_stream = std.io.fixedBufferStream(s); - var medium_json_reader = JsonReader(5, @TypeOf(medium_stream.reader())).init(std.testing.allocator, medium_stream.reader()); + var medium_stream: std.Io.Reader = .fixed(s); + var medium_json_reader: Scanner.Reader = .init(std.testing.allocator, &medium_stream); defer medium_json_reader.deinit(); try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader); } diff --git a/lib/std/json/static.zig b/lib/std/json/static.zig index 2504d59100..44469adf4c 100644 --- a/lib/std/json/static.zig +++ b/lib/std/json/static.zig @@ -4,11 +4,11 @@ const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; const ArrayList = std.ArrayList; -const Scanner = @import("./scanner.zig").Scanner; -const Token = @import("./scanner.zig").Token; -const AllocWhen = @import("./scanner.zig").AllocWhen; -const default_max_value_len = @import("./scanner.zig").default_max_value_len; -const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; +const Scanner = @import("Scanner.zig"); +const Token = Scanner.Token; +const AllocWhen = Scanner.AllocWhen; +const default_max_value_len = Scanner.default_max_value_len; +const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger; const Value = @import("./dynamic.zig").Value; const Array = @import("./dynamic.zig").Array; diff --git a/lib/std/json/static_test.zig b/lib/std/json/static_test.zig index 3a1919e40c..735ccd82e1 100644 --- a/lib/std/json/static_test.zig +++ b/lib/std/json/static_test.zig @@ -12,9 +12,7 @@ const parseFromValue = @import("./static.zig").parseFromValue; const parseFromValueLeaky = @import("./static.zig").parseFromValueLeaky; const ParseOptions = @import("./static.zig").ParseOptions; -const JsonScanner = @import("./scanner.zig").Scanner; -const jsonReader = @import("./scanner.zig").reader; -const Diagnostics = @import("./scanner.zig").Diagnostics; +const Scanner = @import("Scanner.zig"); const Value = @import("./dynamic.zig").Value; @@ -300,9 +298,9 @@ const subnamespaces_0_doc = fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void { // First do the one with the debug info in case we get a SyntaxError or something. { - var scanner = JsonScanner.initCompleteInput(testing.allocator, doc); + var scanner = Scanner.initCompleteInput(testing.allocator, doc); defer scanner.deinit(); - var diagnostics = Diagnostics{}; + var diagnostics = Scanner.Diagnostics{}; scanner.enableDiagnostics(&diagnostics); var parsed = parseFromTokenSource(T, testing.allocator, &scanner, .{}) catch |e| { std.debug.print("at line,col: {}:{}\n", .{ diagnostics.getLine(), diagnostics.getColumn() }); @@ -317,8 +315,8 @@ fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void { try testing.expectEqualDeep(expected, parsed.value); } { - var stream = std.io.fixedBufferStream(doc); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(doc); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); var parsed = try parseFromTokenSource(T, testing.allocator, &json_reader, .{}); defer parsed.deinit(); @@ -331,13 +329,13 @@ fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void { try testing.expectEqualDeep(expected, try parseFromSliceLeaky(T, arena.allocator(), doc, .{})); } { - var scanner = JsonScanner.initCompleteInput(testing.allocator, doc); + var scanner = Scanner.initCompleteInput(testing.allocator, doc); defer scanner.deinit(); try testing.expectEqualDeep(expected, try parseFromTokenSourceLeaky(T, arena.allocator(), &scanner, .{})); } { - var stream = std.io.fixedBufferStream(doc); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(doc); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); try testing.expectEqualDeep(expected, try parseFromTokenSourceLeaky(T, arena.allocator(), &json_reader, .{})); } @@ -763,7 +761,7 @@ test "parse exponential into int" { test "parseFromTokenSource" { { - var scanner = JsonScanner.initCompleteInput(testing.allocator, "123"); + var scanner = Scanner.initCompleteInput(testing.allocator, "123"); defer scanner.deinit(); var parsed = try parseFromTokenSource(u32, testing.allocator, &scanner, .{}); defer parsed.deinit(); @@ -771,8 +769,8 @@ test "parseFromTokenSource" { } { - var stream = std.io.fixedBufferStream("123"); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed("123"); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); var parsed = try parseFromTokenSource(u32, testing.allocator, &json_reader, .{}); defer parsed.deinit(); @@ -836,7 +834,7 @@ test "json parse partial" { \\} ; const allocator = testing.allocator; - var scanner = JsonScanner.initCompleteInput(allocator, str); + var scanner = Scanner.initCompleteInput(allocator, str); defer scanner.deinit(); var arena = ArenaAllocator.init(allocator); @@ -886,8 +884,8 @@ test "json parse allocate when streaming" { var arena = ArenaAllocator.init(allocator); defer arena.deinit(); - var stream = std.io.fixedBufferStream(str); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(str); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); const parsed = parseFromTokenSourceLeaky(T, arena.allocator(), &json_reader, .{}) catch |err| { json_reader.deinit(); diff --git a/lib/std/json/stringify.zig b/lib/std/json/stringify.zig deleted file mode 100644 index aa49573695..0000000000 --- a/lib/std/json/stringify.zig +++ /dev/null @@ -1,772 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; -const BitStack = std.BitStack; - -const OBJECT_MODE = 0; -const ARRAY_MODE = 1; - -pub const StringifyOptions = struct { - /// Controls the whitespace emitted. - /// The default `.minified` is a compact encoding with no whitespace between tokens. - /// Any setting other than `.minified` will use newlines, indentation, and a space after each ':'. - /// `.indent_1` means 1 space for each indentation level, `.indent_2` means 2 spaces, etc. - /// `.indent_tab` uses a tab for each indentation level. - whitespace: enum { - minified, - indent_1, - indent_2, - indent_3, - indent_4, - indent_8, - indent_tab, - } = .minified, - - /// Should optional fields with null value be written? - emit_null_optional_fields: bool = true, - - /// Arrays/slices of u8 are typically encoded as JSON strings. - /// This option emits them as arrays of numbers instead. - /// Does not affect calls to `objectField*()`. - emit_strings_as_arrays: bool = false, - - /// Should unicode characters be escaped in strings? - escape_unicode: bool = false, - - /// When true, renders numbers outside the range `+-1<<53` (the precise integer range of f64) as JSON strings in base 10. - emit_nonportable_numbers_as_strings: bool = false, -}; - -/// Writes the given value to the `std.io.GenericWriter` stream. -/// See `WriteStream` for how the given value is serialized into JSON. -/// The maximum nesting depth of the output JSON document is 256. -/// See also `stringifyMaxDepth` and `stringifyArbitraryDepth`. -pub fn stringify( - value: anytype, - options: StringifyOptions, - out_stream: anytype, -) @TypeOf(out_stream).Error!void { - var jw = writeStream(out_stream, options); - defer jw.deinit(); - try jw.write(value); -} - -/// Like `stringify` with configurable nesting depth. -/// `max_depth` is rounded up to the nearest multiple of 8. -/// Give `null` for `max_depth` to disable some safety checks and allow arbitrary nesting depth. -/// See `writeStreamMaxDepth` for more info. -pub fn stringifyMaxDepth( - value: anytype, - options: StringifyOptions, - out_stream: anytype, - comptime max_depth: ?usize, -) @TypeOf(out_stream).Error!void { - var jw = writeStreamMaxDepth(out_stream, options, max_depth); - try jw.write(value); -} - -/// Like `stringify` but takes an allocator to facilitate safety checks while allowing arbitrary nesting depth. -/// These safety checks can be helpful when debugging custom `jsonStringify` implementations; -/// See `WriteStream`. -pub fn stringifyArbitraryDepth( - allocator: Allocator, - value: anytype, - options: StringifyOptions, - out_stream: anytype, -) WriteStream(@TypeOf(out_stream), .checked_to_arbitrary_depth).Error!void { - var jw = writeStreamArbitraryDepth(allocator, out_stream, options); - defer jw.deinit(); - try jw.write(value); -} - -/// Calls `stringifyArbitraryDepth` and stores the result in dynamically allocated memory -/// instead of taking a `std.io.GenericWriter`. -/// -/// Caller owns returned memory. -pub fn stringifyAlloc( - allocator: Allocator, - value: anytype, - options: StringifyOptions, -) error{OutOfMemory}![]u8 { - var list = std.ArrayList(u8).init(allocator); - errdefer list.deinit(); - try stringifyArbitraryDepth(allocator, value, options, list.writer()); - return list.toOwnedSlice(); -} - -/// See `WriteStream` for documentation. -/// Equivalent to calling `writeStreamMaxDepth` with a depth of `256`. -/// -/// The caller does *not* need to call `deinit()` on the returned object. -pub fn writeStream( - out_stream: anytype, - options: StringifyOptions, -) WriteStream(@TypeOf(out_stream), .{ .checked_to_fixed_depth = 256 }) { - return writeStreamMaxDepth(out_stream, options, 256); -} - -/// See `WriteStream` for documentation. -/// The returned object includes 1 bit of size per `max_depth` to enable safety checks on the order of method calls; -/// see the grammar in the `WriteStream` documentation. -/// `max_depth` is rounded up to the nearest multiple of 8. -/// If the nesting depth exceeds `max_depth`, it is detectable illegal behavior. -/// Give `null` for `max_depth` to disable safety checks for the grammar and allow arbitrary nesting depth. -/// In `ReleaseFast` and `ReleaseSmall`, `max_depth` is ignored, effectively equivalent to passing `null`. -/// Alternatively, see `writeStreamArbitraryDepth` to do safety checks to arbitrary depth. -/// -/// The caller does *not* need to call `deinit()` on the returned object. -pub fn writeStreamMaxDepth( - out_stream: anytype, - options: StringifyOptions, - comptime max_depth: ?usize, -) WriteStream( - @TypeOf(out_stream), - if (max_depth) |d| .{ .checked_to_fixed_depth = d } else .assumed_correct, -) { - return WriteStream( - @TypeOf(out_stream), - if (max_depth) |d| .{ .checked_to_fixed_depth = d } else .assumed_correct, - ).init(undefined, out_stream, options); -} - -/// See `WriteStream` for documentation. -/// This version of the write stream enables safety checks to arbitrarily deep nesting levels -/// by using the given allocator. -/// The caller should call `deinit()` on the returned object to free allocated memory. -/// -/// In `ReleaseFast` and `ReleaseSmall` mode, this function is effectively equivalent to calling `writeStreamMaxDepth(..., null)`; -/// in those build modes, the allocator is *not used*. -pub fn writeStreamArbitraryDepth( - allocator: Allocator, - out_stream: anytype, - options: StringifyOptions, -) WriteStream(@TypeOf(out_stream), .checked_to_arbitrary_depth) { - return WriteStream(@TypeOf(out_stream), .checked_to_arbitrary_depth).init(allocator, out_stream, options); -} - -/// Writes JSON ([RFC8259](https://tools.ietf.org/html/rfc8259)) formatted data -/// to a stream. -/// -/// The sequence of method calls to write JSON content must follow this grammar: -/// ``` -/// = -/// = -/// | -/// | -/// | write -/// | print -/// | -/// = beginObject ( )* endObject -/// = objectField | objectFieldRaw | -/// = beginArray ( )* endArray -/// = beginWriteRaw ( stream.writeAll )* endWriteRaw -/// = beginObjectFieldRaw ( stream.writeAll )* endObjectFieldRaw -/// ``` -/// -/// The `safety_checks_hint` parameter determines how much memory is used to enable assertions that the above grammar is being followed, -/// e.g. tripping an assertion rather than allowing `endObject` to emit the final `}` in `[[[]]}`. -/// "Depth" in this context means the depth of nested `[]` or `{}` expressions -/// (or equivalently the amount of recursion on the `` grammar expression above). -/// For example, emitting the JSON `[[[]]]` requires a depth of 3. -/// If `.checked_to_fixed_depth` is used, there is additionally an assertion that the nesting depth never exceeds the given limit. -/// `.checked_to_arbitrary_depth` requires a runtime allocator for the memory. -/// `.checked_to_fixed_depth` embeds the storage required in the `WriteStream` struct. -/// `.assumed_correct` requires no space and performs none of these assertions. -/// In `ReleaseFast` and `ReleaseSmall` mode, the given `safety_checks_hint` is ignored and is always treated as `.assumed_correct`. -pub fn WriteStream( - comptime OutStream: type, - comptime safety_checks_hint: union(enum) { - checked_to_arbitrary_depth, - checked_to_fixed_depth: usize, // Rounded up to the nearest multiple of 8. - assumed_correct, - }, -) type { - return struct { - const Self = @This(); - const build_mode_has_safety = switch (@import("builtin").mode) { - .Debug, .ReleaseSafe => true, - .ReleaseFast, .ReleaseSmall => false, - }; - const safety_checks: @TypeOf(safety_checks_hint) = if (build_mode_has_safety) - safety_checks_hint - else - .assumed_correct; - - pub const Stream = OutStream; - pub const Error = switch (safety_checks) { - .checked_to_arbitrary_depth => Stream.Error || error{OutOfMemory}, - .checked_to_fixed_depth, .assumed_correct => Stream.Error, - }; - - options: StringifyOptions, - - stream: OutStream, - indent_level: usize = 0, - next_punctuation: enum { - the_beginning, - none, - comma, - colon, - } = .the_beginning, - - nesting_stack: switch (safety_checks) { - .checked_to_arbitrary_depth => BitStack, - .checked_to_fixed_depth => |fixed_buffer_size| [(fixed_buffer_size + 7) >> 3]u8, - .assumed_correct => void, - }, - - raw_streaming_mode: if (build_mode_has_safety) - enum { none, value, objectField } - else - void = if (build_mode_has_safety) .none else {}, - - pub fn init(safety_allocator: Allocator, stream: OutStream, options: StringifyOptions) Self { - return .{ - .options = options, - .stream = stream, - .nesting_stack = switch (safety_checks) { - .checked_to_arbitrary_depth => BitStack.init(safety_allocator), - .checked_to_fixed_depth => |fixed_buffer_size| [_]u8{0} ** ((fixed_buffer_size + 7) >> 3), - .assumed_correct => {}, - }, - }; - } - - /// Only necessary with .checked_to_arbitrary_depth. - pub fn deinit(self: *Self) void { - switch (safety_checks) { - .checked_to_arbitrary_depth => self.nesting_stack.deinit(), - .checked_to_fixed_depth, .assumed_correct => {}, - } - self.* = undefined; - } - - pub fn beginArray(self: *Self) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - try self.valueStart(); - try self.stream.writeByte('['); - try self.pushIndentation(ARRAY_MODE); - self.next_punctuation = .none; - } - - pub fn beginObject(self: *Self) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - try self.valueStart(); - try self.stream.writeByte('{'); - try self.pushIndentation(OBJECT_MODE); - self.next_punctuation = .none; - } - - pub fn endArray(self: *Self) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - self.popIndentation(ARRAY_MODE); - switch (self.next_punctuation) { - .none => {}, - .comma => { - try self.indent(); - }, - .the_beginning, .colon => unreachable, - } - try self.stream.writeByte(']'); - self.valueDone(); - } - - pub fn endObject(self: *Self) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - self.popIndentation(OBJECT_MODE); - switch (self.next_punctuation) { - .none => {}, - .comma => { - try self.indent(); - }, - .the_beginning, .colon => unreachable, - } - try self.stream.writeByte('}'); - self.valueDone(); - } - - fn pushIndentation(self: *Self, mode: u1) !void { - switch (safety_checks) { - .checked_to_arbitrary_depth => { - try self.nesting_stack.push(mode); - self.indent_level += 1; - }, - .checked_to_fixed_depth => { - BitStack.pushWithStateAssumeCapacity(&self.nesting_stack, &self.indent_level, mode); - }, - .assumed_correct => { - self.indent_level += 1; - }, - } - } - fn popIndentation(self: *Self, assert_its_this_one: u1) void { - switch (safety_checks) { - .checked_to_arbitrary_depth => { - assert(self.nesting_stack.pop() == assert_its_this_one); - self.indent_level -= 1; - }, - .checked_to_fixed_depth => { - assert(BitStack.popWithState(&self.nesting_stack, &self.indent_level) == assert_its_this_one); - }, - .assumed_correct => { - self.indent_level -= 1; - }, - } - } - - fn indent(self: *Self) !void { - var char: u8 = ' '; - const n_chars = switch (self.options.whitespace) { - .minified => return, - .indent_1 => 1 * self.indent_level, - .indent_2 => 2 * self.indent_level, - .indent_3 => 3 * self.indent_level, - .indent_4 => 4 * self.indent_level, - .indent_8 => 8 * self.indent_level, - .indent_tab => blk: { - char = '\t'; - break :blk self.indent_level; - }, - }; - try self.stream.writeByte('\n'); - try self.stream.writeByteNTimes(char, n_chars); - } - - fn valueStart(self: *Self) !void { - if (self.isObjectKeyExpected()) |is_it| assert(!is_it); // Call objectField*(), not write(), for object keys. - return self.valueStartAssumeTypeOk(); - } - fn objectFieldStart(self: *Self) !void { - if (self.isObjectKeyExpected()) |is_it| assert(is_it); // Expected write(), not objectField*(). - return self.valueStartAssumeTypeOk(); - } - fn valueStartAssumeTypeOk(self: *Self) !void { - assert(!self.isComplete()); // JSON document already complete. - switch (self.next_punctuation) { - .the_beginning => { - // No indentation for the very beginning. - }, - .none => { - // First item in a container. - try self.indent(); - }, - .comma => { - // Subsequent item in a container. - try self.stream.writeByte(','); - try self.indent(); - }, - .colon => { - try self.stream.writeByte(':'); - if (self.options.whitespace != .minified) { - try self.stream.writeByte(' '); - } - }, - } - } - fn valueDone(self: *Self) void { - self.next_punctuation = .comma; - } - - // Only when safety is enabled: - fn isObjectKeyExpected(self: *const Self) ?bool { - switch (safety_checks) { - .checked_to_arbitrary_depth => return self.indent_level > 0 and - self.nesting_stack.peek() == OBJECT_MODE and - self.next_punctuation != .colon, - .checked_to_fixed_depth => return self.indent_level > 0 and - BitStack.peekWithState(&self.nesting_stack, self.indent_level) == OBJECT_MODE and - self.next_punctuation != .colon, - .assumed_correct => return null, - } - } - fn isComplete(self: *const Self) bool { - return self.indent_level == 0 and self.next_punctuation == .comma; - } - - /// An alternative to calling `write` that formats a value with `std.fmt`. - /// This function does the usual punctuation and indentation formatting - /// assuming the resulting formatted string represents a single complete value; - /// e.g. `"1"`, `"[]"`, `"[1,2]"`, not `"1,2"`. - /// This function may be useful for doing your own number formatting. - pub fn print(self: *Self, comptime fmt: []const u8, args: anytype) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - try self.valueStart(); - try self.stream.print(fmt, args); - self.valueDone(); - } - - /// An alternative to calling `write` that allows you to write directly to the `.stream` field, e.g. with `.stream.writeAll()`. - /// Call `beginWriteRaw()`, then write a complete value (including any quotes if necessary) directly to the `.stream` field, - /// then call `endWriteRaw()`. - /// This can be useful for streaming very long strings into the output without needing it all buffered in memory. - pub fn beginWriteRaw(self: *Self) !void { - if (build_mode_has_safety) { - assert(self.raw_streaming_mode == .none); - self.raw_streaming_mode = .value; - } - try self.valueStart(); - } - - /// See `beginWriteRaw`. - pub fn endWriteRaw(self: *Self) void { - if (build_mode_has_safety) { - assert(self.raw_streaming_mode == .value); - self.raw_streaming_mode = .none; - } - self.valueDone(); - } - - /// See `WriteStream` for when to call this method. - /// `key` is the string content of the property name. - /// Surrounding quotes will be added and any special characters will be escaped. - /// See also `objectFieldRaw`. - pub fn objectField(self: *Self, key: []const u8) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - try self.objectFieldStart(); - try encodeJsonString(key, self.options, self.stream); - self.next_punctuation = .colon; - } - /// See `WriteStream` for when to call this method. - /// `quoted_key` is the complete bytes of the key including quotes and any necessary escape sequences. - /// A few assertions are performed on the given value to ensure that the caller of this function understands the API contract. - /// See also `objectField`. - pub fn objectFieldRaw(self: *Self, quoted_key: []const u8) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - assert(quoted_key.len >= 2 and quoted_key[0] == '"' and quoted_key[quoted_key.len - 1] == '"'); // quoted_key should be "quoted". - try self.objectFieldStart(); - try self.stream.writeAll(quoted_key); - self.next_punctuation = .colon; - } - - /// In the rare case that you need to write very long object field names, - /// this is an alternative to `objectField` and `objectFieldRaw` that allows you to write directly to the `.stream` field - /// similar to `beginWriteRaw`. - /// Call `endObjectFieldRaw()` when you're done. - pub fn beginObjectFieldRaw(self: *Self) !void { - if (build_mode_has_safety) { - assert(self.raw_streaming_mode == .none); - self.raw_streaming_mode = .objectField; - } - try self.objectFieldStart(); - } - - /// See `beginObjectFieldRaw`. - pub fn endObjectFieldRaw(self: *Self) void { - if (build_mode_has_safety) { - assert(self.raw_streaming_mode == .objectField); - self.raw_streaming_mode = .none; - } - self.next_punctuation = .colon; - } - - /// Renders the given Zig value as JSON. - /// - /// Supported types: - /// * Zig `bool` -> JSON `true` or `false`. - /// * Zig `?T` -> `null` or the rendering of `T`. - /// * Zig `i32`, `u64`, etc. -> JSON number or string. - /// * When option `emit_nonportable_numbers_as_strings` is true, if the value is outside the range `+-1<<53` (the precise integer range of f64), it is rendered as a JSON string in base 10. Otherwise, it is rendered as JSON number. - /// * Zig floats -> JSON number or string. - /// * If the value cannot be precisely represented by an f64, it is rendered as a JSON string. Otherwise, it is rendered as JSON number. - /// * Zig `[]const u8`, `[]u8`, `*[N]u8`, `@Vector(N, u8)`, and similar -> JSON string. - /// * See `StringifyOptions.emit_strings_as_arrays`. - /// * If the content is not valid UTF-8, rendered as an array of numbers instead. - /// * Zig `[]T`, `[N]T`, `*[N]T`, `@Vector(N, T)`, and similar -> JSON array of the rendering of each item. - /// * Zig tuple -> JSON array of the rendering of each item. - /// * Zig `struct` -> JSON object with each field in declaration order. - /// * If the struct declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`. See `std.json.Value` for an example. - /// * See `StringifyOptions.emit_null_optional_fields`. - /// * Zig `union(enum)` -> JSON object with one field named for the active tag and a value representing the payload. - /// * If the payload is `void`, then the emitted value is `{}`. - /// * If the union declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`. - /// * Zig `enum` -> JSON string naming the active tag. - /// * If the enum declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`. - /// * If the enum is non-exhaustive, unnamed values are rendered as integers. - /// * Zig untyped enum literal -> JSON string naming the active tag. - /// * Zig error -> JSON string naming the error. - /// * Zig `*T` -> the rendering of `T`. Note there is no guard against circular-reference infinite recursion. - /// - /// See also alternative functions `print` and `beginWriteRaw`. - /// For writing object field names, use `objectField` instead. - pub fn write(self: *Self, value: anytype) Error!void { - if (build_mode_has_safety) assert(self.raw_streaming_mode == .none); - const T = @TypeOf(value); - switch (@typeInfo(T)) { - .int => { - try self.valueStart(); - if (self.options.emit_nonportable_numbers_as_strings and - (value <= -(1 << 53) or value >= (1 << 53))) - { - try self.stream.print("\"{}\"", .{value}); - } else { - try self.stream.print("{}", .{value}); - } - self.valueDone(); - return; - }, - .comptime_int => { - return self.write(@as(std.math.IntFittingRange(value, value), value)); - }, - .float, .comptime_float => { - if (@as(f64, @floatCast(value)) == value) { - try self.valueStart(); - try self.stream.print("{}", .{@as(f64, @floatCast(value))}); - self.valueDone(); - return; - } - try self.valueStart(); - try self.stream.print("\"{}\"", .{value}); - self.valueDone(); - return; - }, - - .bool => { - try self.valueStart(); - try self.stream.writeAll(if (value) "true" else "false"); - self.valueDone(); - return; - }, - .null => { - try self.valueStart(); - try self.stream.writeAll("null"); - self.valueDone(); - return; - }, - .optional => { - if (value) |payload| { - return try self.write(payload); - } else { - return try self.write(null); - } - }, - .@"enum" => |enum_info| { - if (std.meta.hasFn(T, "jsonStringify")) { - return value.jsonStringify(self); - } - - if (!enum_info.is_exhaustive) { - inline for (enum_info.fields) |field| { - if (value == @field(T, field.name)) { - break; - } - } else { - return self.write(@intFromEnum(value)); - } - } - - return self.stringValue(@tagName(value)); - }, - .enum_literal => { - return self.stringValue(@tagName(value)); - }, - .@"union" => { - if (std.meta.hasFn(T, "jsonStringify")) { - return value.jsonStringify(self); - } - - const info = @typeInfo(T).@"union"; - if (info.tag_type) |UnionTagType| { - try self.beginObject(); - inline for (info.fields) |u_field| { - if (value == @field(UnionTagType, u_field.name)) { - try self.objectField(u_field.name); - if (u_field.type == void) { - // void value is {} - try self.beginObject(); - try self.endObject(); - } else { - try self.write(@field(value, u_field.name)); - } - break; - } - } else { - unreachable; // No active tag? - } - try self.endObject(); - return; - } else { - @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .@"struct" => |S| { - if (std.meta.hasFn(T, "jsonStringify")) { - return value.jsonStringify(self); - } - - if (S.is_tuple) { - try self.beginArray(); - } else { - try self.beginObject(); - } - inline for (S.fields) |Field| { - // don't include void fields - if (Field.type == void) continue; - - var emit_field = true; - - // don't include optional fields that are null when emit_null_optional_fields is set to false - if (@typeInfo(Field.type) == .optional) { - if (self.options.emit_null_optional_fields == false) { - if (@field(value, Field.name) == null) { - emit_field = false; - } - } - } - - if (emit_field) { - if (!S.is_tuple) { - try self.objectField(Field.name); - } - try self.write(@field(value, Field.name)); - } - } - if (S.is_tuple) { - try self.endArray(); - } else { - try self.endObject(); - } - return; - }, - .error_set => return self.stringValue(@errorName(value)), - .pointer => |ptr_info| switch (ptr_info.size) { - .one => switch (@typeInfo(ptr_info.child)) { - .array => { - // Coerce `*[N]T` to `[]const T`. - const Slice = []const std.meta.Elem(ptr_info.child); - return self.write(@as(Slice, value)); - }, - else => { - return self.write(value.*); - }, - }, - .many, .slice => { - if (ptr_info.size == .many and ptr_info.sentinel() == null) - @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel"); - const slice = if (ptr_info.size == .many) std.mem.span(value) else value; - - if (ptr_info.child == u8) { - // This is a []const u8, or some similar Zig string. - if (!self.options.emit_strings_as_arrays and std.unicode.utf8ValidateSlice(slice)) { - return self.stringValue(slice); - } - } - - try self.beginArray(); - for (slice) |x| { - try self.write(x); - } - try self.endArray(); - return; - }, - else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), - }, - .array => { - // Coerce `[N]T` to `*const [N]T` (and then to `[]const T`). - return self.write(&value); - }, - .vector => |info| { - const array: [info.len]info.child = value; - return self.write(&array); - }, - else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), - } - unreachable; - } - - fn stringValue(self: *Self, s: []const u8) !void { - try self.valueStart(); - try encodeJsonString(s, self.options, self.stream); - self.valueDone(); - } - }; -} - -fn outputUnicodeEscape(codepoint: u21, out_stream: anytype) !void { - if (codepoint <= 0xFFFF) { - // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), - // then it may be represented as a six-character sequence: a reverse solidus, followed - // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point. - try out_stream.writeAll("\\u"); - //try w.printInt("x", .{ .width = 4, .fill = '0' }, codepoint); - try std.fmt.format(out_stream, "{x:0>4}", .{codepoint}); - } else { - assert(codepoint <= 0x10FFFF); - // To escape an extended character that is not in the Basic Multilingual Plane, - // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair. - const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; - const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; - try out_stream.writeAll("\\u"); - //try w.printInt("x", .{ .width = 4, .fill = '0' }, high); - try std.fmt.format(out_stream, "{x:0>4}", .{high}); - try out_stream.writeAll("\\u"); - //try w.printInt("x", .{ .width = 4, .fill = '0' }, low); - try std.fmt.format(out_stream, "{x:0>4}", .{low}); - } -} - -fn outputSpecialEscape(c: u8, writer: anytype) !void { - switch (c) { - '\\' => try writer.writeAll("\\\\"), - '\"' => try writer.writeAll("\\\""), - 0x08 => try writer.writeAll("\\b"), - 0x0C => try writer.writeAll("\\f"), - '\n' => try writer.writeAll("\\n"), - '\r' => try writer.writeAll("\\r"), - '\t' => try writer.writeAll("\\t"), - else => try outputUnicodeEscape(c, writer), - } -} - -/// Write `string` to `writer` as a JSON encoded string. -pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void { - try writer.writeByte('\"'); - try encodeJsonStringChars(string, options, writer); - try writer.writeByte('\"'); -} - -/// Write `chars` to `writer` as JSON encoded string characters. -pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void { - var write_cursor: usize = 0; - var i: usize = 0; - if (options.escape_unicode) { - while (i < chars.len) : (i += 1) { - switch (chars[i]) { - // normal ascii character - 0x20...0x21, 0x23...0x5B, 0x5D...0x7E => {}, - 0x00...0x1F, '\\', '\"' => { - // Always must escape these. - try writer.writeAll(chars[write_cursor..i]); - try outputSpecialEscape(chars[i], writer); - write_cursor = i + 1; - }, - 0x7F...0xFF => { - try writer.writeAll(chars[write_cursor..i]); - const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable; - const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable; - try outputUnicodeEscape(codepoint, writer); - i += ulen - 1; - write_cursor = i + 1; - }, - } - } - } else { - while (i < chars.len) : (i += 1) { - switch (chars[i]) { - // normal bytes - 0x20...0x21, 0x23...0x5B, 0x5D...0xFF => {}, - 0x00...0x1F, '\\', '\"' => { - // Always must escape these. - try writer.writeAll(chars[write_cursor..i]); - try outputSpecialEscape(chars[i], writer); - write_cursor = i + 1; - }, - } - } - } - try writer.writeAll(chars[write_cursor..chars.len]); -} - -test { - _ = @import("./stringify_test.zig"); -} diff --git a/lib/std/json/stringify_test.zig b/lib/std/json/stringify_test.zig deleted file mode 100644 index 22dd504285..0000000000 --- a/lib/std/json/stringify_test.zig +++ /dev/null @@ -1,504 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const testing = std.testing; - -const ObjectMap = @import("dynamic.zig").ObjectMap; -const Value = @import("dynamic.zig").Value; - -const StringifyOptions = @import("stringify.zig").StringifyOptions; -const stringify = @import("stringify.zig").stringify; -const stringifyMaxDepth = @import("stringify.zig").stringifyMaxDepth; -const stringifyArbitraryDepth = @import("stringify.zig").stringifyArbitraryDepth; -const stringifyAlloc = @import("stringify.zig").stringifyAlloc; -const writeStream = @import("stringify.zig").writeStream; -const writeStreamMaxDepth = @import("stringify.zig").writeStreamMaxDepth; -const writeStreamArbitraryDepth = @import("stringify.zig").writeStreamArbitraryDepth; - -test "json write stream" { - var out_buf: [1024]u8 = undefined; - var slice_stream = std.io.fixedBufferStream(&out_buf); - const out = slice_stream.writer(); - - { - var w = writeStream(out, .{ .whitespace = .indent_2 }); - try testBasicWriteStream(&w, &slice_stream); - } - - { - var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, 8); - try testBasicWriteStream(&w, &slice_stream); - } - - { - var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, null); - try testBasicWriteStream(&w, &slice_stream); - } - - { - var w = writeStreamArbitraryDepth(testing.allocator, out, .{ .whitespace = .indent_2 }); - defer w.deinit(); - try testBasicWriteStream(&w, &slice_stream); - } -} - -fn testBasicWriteStream(w: anytype, slice_stream: anytype) !void { - slice_stream.reset(); - - try w.beginObject(); - - try w.objectField("object"); - var arena_allocator = std.heap.ArenaAllocator.init(testing.allocator); - defer arena_allocator.deinit(); - try w.write(try getJsonObject(arena_allocator.allocator())); - - try w.objectFieldRaw("\"string\""); - try w.write("This is a string"); - - try w.objectField("array"); - try w.beginArray(); - try w.write("Another string"); - try w.write(@as(i32, 1)); - try w.write(@as(f32, 3.5)); - try w.endArray(); - - try w.objectField("int"); - try w.write(@as(i32, 10)); - - try w.objectField("float"); - try w.write(@as(f32, 3.5)); - - try w.endObject(); - - const result = slice_stream.getWritten(); - const expected = - \\{ - \\ "object": { - \\ "one": 1, - \\ "two": 2 - \\ }, - \\ "string": "This is a string", - \\ "array": [ - \\ "Another string", - \\ 1, - \\ 3.5 - \\ ], - \\ "int": 10, - \\ "float": 3.5 - \\} - ; - try std.testing.expectEqualStrings(expected, result); -} - -fn getJsonObject(allocator: std.mem.Allocator) !Value { - var value = Value{ .object = ObjectMap.init(allocator) }; - try value.object.put("one", Value{ .integer = @as(i64, @intCast(1)) }); - try value.object.put("two", Value{ .float = 2.0 }); - return value; -} - -test "stringify null optional fields" { - const MyStruct = struct { - optional: ?[]const u8 = null, - required: []const u8 = "something", - another_optional: ?[]const u8 = null, - another_required: []const u8 = "something else", - }; - try testStringify( - \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"} - , - MyStruct{}, - .{}, - ); - try testStringify( - \\{"required":"something","another_required":"something else"} - , - MyStruct{}, - .{ .emit_null_optional_fields = false }, - ); -} - -test "stringify basic types" { - try testStringify("false", false, .{}); - try testStringify("true", true, .{}); - try testStringify("null", @as(?u8, null), .{}); - try testStringify("null", @as(?*u32, null), .{}); - try testStringify("42", 42, .{}); - try testStringify("42", 42.0, .{}); - try testStringify("42", @as(u8, 42), .{}); - try testStringify("42", @as(u128, 42), .{}); - try testStringify("9999999999999999", 9999999999999999, .{}); - try testStringify("42", @as(f32, 42), .{}); - try testStringify("42", @as(f64, 42), .{}); - try testStringify("\"ItBroke\"", @as(anyerror, error.ItBroke), .{}); - try testStringify("\"ItBroke\"", error.ItBroke, .{}); -} - -test "stringify string" { - try testStringify("\"hello\"", "hello", .{}); - try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{}); - try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{ .escape_unicode = true }); - try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{}); - try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{80}\"", "with unicode\u{80}", .{}); - try testStringify("\"with unicode\\u0080\"", "with unicode\u{80}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", .{}); - try testStringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{100}\"", "with unicode\u{100}", .{}); - try testStringify("\"with unicode\\u0100\"", "with unicode\u{100}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{800}\"", "with unicode\u{800}", .{}); - try testStringify("\"with unicode\\u0800\"", "with unicode\u{800}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", .{}); - try testStringify("\"with unicode\\u8000\"", "with unicode\u{8000}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", .{}); - try testStringify("\"with unicode\\ud799\"", "with unicode\u{D799}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", .{}); - try testStringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", .{ .escape_unicode = true }); - try testStringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", .{}); - try testStringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", .{ .escape_unicode = true }); -} - -test "stringify many-item sentinel-terminated string" { - try testStringify("\"hello\"", @as([*:0]const u8, "hello"), .{}); - try testStringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), .{ .escape_unicode = true }); - try testStringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), .{ .escape_unicode = true }); -} - -test "stringify enums" { - const E = enum { - foo, - bar, - }; - try testStringify("\"foo\"", E.foo, .{}); - try testStringify("\"bar\"", E.bar, .{}); -} - -test "stringify non-exhaustive enum" { - const E = enum(u8) { - foo = 0, - _, - }; - try testStringify("\"foo\"", E.foo, .{}); - try testStringify("1", @as(E, @enumFromInt(1)), .{}); -} - -test "stringify enum literals" { - try testStringify("\"foo\"", .foo, .{}); - try testStringify("\"bar\"", .bar, .{}); -} - -test "stringify tagged unions" { - const T = union(enum) { - nothing, - foo: u32, - bar: bool, - }; - try testStringify("{\"nothing\":{}}", T{ .nothing = {} }, .{}); - try testStringify("{\"foo\":42}", T{ .foo = 42 }, .{}); - try testStringify("{\"bar\":true}", T{ .bar = true }, .{}); -} - -test "stringify struct" { - try testStringify("{\"foo\":42}", struct { - foo: u32, - }{ .foo = 42 }, .{}); -} - -test "emit_strings_as_arrays" { - // Should only affect string values, not object keys. - try testStringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, .{}); - try testStringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, .{ .emit_strings_as_arrays = true }); - // Should *not* affect these types: - try testStringify("\"foo\"", @as(enum { foo, bar }, .foo), .{ .emit_strings_as_arrays = true }); - try testStringify("\"ItBroke\"", error.ItBroke, .{ .emit_strings_as_arrays = true }); - // Should work on these: - try testStringify("\"bar\"", @Vector(3, u8){ 'b', 'a', 'r' }, .{}); - try testStringify("[98,97,114]", @Vector(3, u8){ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true }); - try testStringify("\"bar\"", [3]u8{ 'b', 'a', 'r' }, .{}); - try testStringify("[98,97,114]", [3]u8{ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true }); -} - -test "stringify struct with indentation" { - try testStringify( - \\{ - \\ "foo": 42, - \\ "bar": [ - \\ 1, - \\ 2, - \\ 3 - \\ ] - \\} - , - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - .{ .whitespace = .indent_4 }, - ); - try testStringify( - "{\n\t\"foo\": 42,\n\t\"bar\": [\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}", - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - .{ .whitespace = .indent_tab }, - ); - try testStringify( - \\{"foo":42,"bar":[1,2,3]} - , - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - .{ .whitespace = .minified }, - ); -} - -test "stringify struct with void field" { - try testStringify("{\"foo\":42}", struct { - foo: u32, - bar: void = {}, - }{ .foo = 42 }, .{}); -} - -test "stringify array of structs" { - const MyStruct = struct { - foo: u32, - }; - try testStringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ - MyStruct{ .foo = 42 }, - MyStruct{ .foo = 100 }, - MyStruct{ .foo = 1000 }, - }, .{}); -} - -test "stringify struct with custom stringifier" { - try testStringify("[\"something special\",42]", struct { - foo: u32, - const Self = @This(); - pub fn jsonStringify(value: @This(), jws: anytype) !void { - _ = value; - try jws.beginArray(); - try jws.write("something special"); - try jws.write(42); - try jws.endArray(); - } - }{ .foo = 42 }, .{}); -} - -test "stringify vector" { - try testStringify("[1,1]", @as(@Vector(2, u32), @splat(1)), .{}); - try testStringify("\"AA\"", @as(@Vector(2, u8), @splat('A')), .{}); - try testStringify("[65,65]", @as(@Vector(2, u8), @splat('A')), .{ .emit_strings_as_arrays = true }); -} - -test "stringify tuple" { - try testStringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, .{}); -} - -fn testStringify(expected: []const u8, value: anytype, options: StringifyOptions) !void { - const ValidationWriter = struct { - const Self = @This(); - pub const Writer = std.io.GenericWriter(*Self, Error, write); - pub const Error = error{ - TooMuchData, - DifferentData, - }; - - expected_remaining: []const u8, - - fn init(exp: []const u8) Self { - return .{ .expected_remaining = exp }; - } - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (self.expected_remaining.len < bytes.len) { - std.debug.print( - \\====== expected this output: ========= - \\{s} - \\======== instead found this: ========= - \\{s} - \\====================================== - , .{ - self.expected_remaining, - bytes, - }); - return error.TooMuchData; - } - if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) { - std.debug.print( - \\====== expected this output: ========= - \\{s} - \\======== instead found this: ========= - \\{s} - \\====================================== - , .{ - self.expected_remaining[0..bytes.len], - bytes, - }); - return error.DifferentData; - } - self.expected_remaining = self.expected_remaining[bytes.len..]; - return bytes.len; - } - }; - - var vos = ValidationWriter.init(expected); - try stringifyArbitraryDepth(testing.allocator, value, options, vos.writer()); - if (vos.expected_remaining.len > 0) return error.NotEnoughData; - - // Also test with safety disabled. - try testStringifyMaxDepth(expected, value, options, null); - try testStringifyArbitraryDepth(expected, value, options); -} - -fn testStringifyMaxDepth(expected: []const u8, value: anytype, options: StringifyOptions, comptime max_depth: ?usize) !void { - var out_buf: [1024]u8 = undefined; - var slice_stream = std.io.fixedBufferStream(&out_buf); - const out = slice_stream.writer(); - - try stringifyMaxDepth(value, options, out, max_depth); - const got = slice_stream.getWritten(); - - try testing.expectEqualStrings(expected, got); -} - -fn testStringifyArbitraryDepth(expected: []const u8, value: anytype, options: StringifyOptions) !void { - var out_buf: [1024]u8 = undefined; - var slice_stream = std.io.fixedBufferStream(&out_buf); - const out = slice_stream.writer(); - - try stringifyArbitraryDepth(testing.allocator, value, options, out); - const got = slice_stream.getWritten(); - - try testing.expectEqualStrings(expected, got); -} - -test "stringify alloc" { - const allocator = std.testing.allocator; - const expected = - \\{"foo":"bar","answer":42,"my_friend":"sammy"} - ; - const actual = try stringifyAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{}); - defer allocator.free(actual); - - try std.testing.expectEqualStrings(expected, actual); -} - -test "comptime stringify" { - comptime testStringifyMaxDepth("false", false, .{}, null) catch unreachable; - comptime testStringifyMaxDepth("false", false, .{}, 0) catch unreachable; - comptime testStringifyArbitraryDepth("false", false, .{}) catch unreachable; - - const MyStruct = struct { - foo: u32, - }; - comptime testStringifyMaxDepth("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ - MyStruct{ .foo = 42 }, - MyStruct{ .foo = 100 }, - MyStruct{ .foo = 1000 }, - }, .{}, null) catch unreachable; - comptime testStringifyMaxDepth("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ - MyStruct{ .foo = 42 }, - MyStruct{ .foo = 100 }, - MyStruct{ .foo = 1000 }, - }, .{}, 8) catch unreachable; -} - -test "print" { - var out_buf: [1024]u8 = undefined; - var slice_stream = std.io.fixedBufferStream(&out_buf); - const out = slice_stream.writer(); - - var w = writeStream(out, .{ .whitespace = .indent_2 }); - defer w.deinit(); - - try w.beginObject(); - try w.objectField("a"); - try w.print("[ ]", .{}); - try w.objectField("b"); - try w.beginArray(); - try w.print("[{s}] ", .{"[]"}); - try w.print(" {}", .{12345}); - try w.endArray(); - try w.endObject(); - - const result = slice_stream.getWritten(); - const expected = - \\{ - \\ "a": [ ], - \\ "b": [ - \\ [[]] , - \\ 12345 - \\ ] - \\} - ; - try std.testing.expectEqualStrings(expected, result); -} - -test "nonportable numbers" { - try testStringify("9999999999999999", 9999999999999999, .{}); - try testStringify("\"9999999999999999\"", 9999999999999999, .{ .emit_nonportable_numbers_as_strings = true }); -} - -test "stringify raw streaming" { - var out_buf: [1024]u8 = undefined; - var slice_stream = std.io.fixedBufferStream(&out_buf); - const out = slice_stream.writer(); - - { - var w = writeStream(out, .{ .whitespace = .indent_2 }); - try testRawStreaming(&w, &slice_stream); - } - - { - var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, 8); - try testRawStreaming(&w, &slice_stream); - } - - { - var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, null); - try testRawStreaming(&w, &slice_stream); - } - - { - var w = writeStreamArbitraryDepth(testing.allocator, out, .{ .whitespace = .indent_2 }); - defer w.deinit(); - try testRawStreaming(&w, &slice_stream); - } -} - -fn testRawStreaming(w: anytype, slice_stream: anytype) !void { - slice_stream.reset(); - - try w.beginObject(); - try w.beginObjectFieldRaw(); - try w.stream.writeAll("\"long"); - try w.stream.writeAll(" key\""); - w.endObjectFieldRaw(); - try w.beginWriteRaw(); - try w.stream.writeAll("\"long"); - try w.stream.writeAll(" value\""); - w.endWriteRaw(); - try w.endObject(); - - const result = slice_stream.getWritten(); - const expected = - \\{ - \\ "long key": "long value" - \\} - ; - try std.testing.expectEqualStrings(expected, result); -} diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 136e8e34d1..d3d803e939 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -1,10 +1,9 @@ const std = @import("std"); +const json = std.json; const testing = std.testing; const parseFromSlice = @import("./static.zig").parseFromSlice; -const validate = @import("./scanner.zig").validate; -const JsonScanner = @import("./scanner.zig").Scanner; +const Scanner = @import("./Scanner.zig"); const Value = @import("./dynamic.zig").Value; -const stringifyAlloc = @import("./stringify.zig").stringifyAlloc; // Support for JSONTestSuite.zig pub fn ok(s: []const u8) !void { @@ -20,7 +19,7 @@ pub fn any(s: []const u8) !void { testHighLevelDynamicParser(s) catch {}; } fn testLowLevelScanner(s: []const u8) !void { - var scanner = JsonScanner.initCompleteInput(testing.allocator, s); + var scanner = Scanner.initCompleteInput(testing.allocator, s); defer scanner.deinit(); while (true) { const token = try scanner.next(); @@ -47,12 +46,12 @@ test "n_object_closed_missing_value" { } fn roundTrip(s: []const u8) !void { - try testing.expect(try validate(testing.allocator, s)); + try testing.expect(try Scanner.validate(testing.allocator, s)); var parsed = try parseFromSlice(Value, testing.allocator, s, .{}); defer parsed.deinit(); - const rendered = try stringifyAlloc(testing.allocator, parsed.value, .{}); + const rendered = try json.Stringify.valueAlloc(testing.allocator, parsed.value, .{}); defer testing.allocator.free(rendered); try testing.expectEqualStrings(s, rendered);