diff --git a/doc/langref.html.in b/doc/langref.html.in
index dcf13e812d..e8189e5c42 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -7987,7 +7987,7 @@ AsmInput <- COLON AsmInputList AsmClobbers?
AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN
-AsmClobbers <- COLON StringList
+AsmClobbers <- COLON Expr
# *** Helper grammar ***
BreakLabel <- COLON IDENTIFIER
diff --git a/lib/compiler/resinator/main.zig b/lib/compiler/resinator/main.zig
index 4c952c03c4..30e9c825bb 100644
--- a/lib/compiler/resinator/main.zig
+++ b/lib/compiler/resinator/main.zig
@@ -292,12 +292,14 @@ pub fn main() !void {
};
defer depfile.close();
- const depfile_writer = depfile.deprecatedWriter();
- var depfile_buffered_writer = std.io.bufferedWriter(depfile_writer);
+ var depfile_buffer: [1024]u8 = undefined;
+ var depfile_writer = depfile.writer(&depfile_buffer);
switch (options.depfile_fmt) {
.json => {
- var write_stream = std.json.writeStream(depfile_buffered_writer.writer(), .{ .whitespace = .indent_2 });
- defer write_stream.deinit();
+ var write_stream: std.json.Stringify = .{
+ .writer = &depfile_writer.interface,
+ .options = .{ .whitespace = .indent_2 },
+ };
try write_stream.beginArray();
for (dependencies_list.items) |dep_path| {
@@ -306,7 +308,7 @@ pub fn main() !void {
try write_stream.endArray();
},
}
- try depfile_buffered_writer.flush();
+ try depfile_writer.interface.flush();
}
}
diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig
index a69066f09c..8b60a75399 100644
--- a/lib/compiler/test_runner.zig
+++ b/lib/compiler/test_runner.zig
@@ -10,10 +10,10 @@ pub const std_options: std.Options = .{
};
var log_err_count: usize = 0;
-var fba_buffer: [8192]u8 = undefined;
var fba = std.heap.FixedBufferAllocator.init(&fba_buffer);
-var stdin_buffer: [std.heap.page_size_min]u8 align(std.heap.page_size_min) = undefined;
-var stdout_buffer: [std.heap.page_size_min]u8 align(std.heap.page_size_min) = undefined;
+var fba_buffer: [8192]u8 = undefined;
+var stdin_buffer: [4096]u8 = undefined;
+var stdout_buffer: [4096]u8 = undefined;
const crippled = switch (builtin.zig_backend) {
.stage2_powerpc,
@@ -68,8 +68,8 @@ pub fn main() void {
fn mainServer() !void {
@disableInstrumentation();
- var stdin_reader = std.fs.File.stdin().reader(&stdin_buffer);
- var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
+ var stdin_reader = std.fs.File.stdin().readerStreaming(&stdin_buffer);
+ var stdout_writer = std.fs.File.stdout().writerStreaming(&stdout_buffer);
var server = try std.zig.Server.init(.{
.in = &stdin_reader.interface,
.out = &stdout_writer.interface,
@@ -104,7 +104,7 @@ fn mainServer() !void {
defer testing.allocator.free(expected_panic_msgs);
for (test_fns, names, expected_panic_msgs) |test_fn, *name, *expected_panic_msg| {
- name.* = @as(u32, @intCast(string_bytes.items.len));
+ name.* = @intCast(string_bytes.items.len);
try string_bytes.ensureUnusedCapacity(testing.allocator, test_fn.name.len + 1);
string_bytes.appendSliceAssumeCapacity(test_fn.name);
string_bytes.appendAssumeCapacity(0);
diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig
index a0a58067fc..efd0f86105 100644
--- a/lib/std/Build/Cache/Path.zig
+++ b/lib/std/Build/Cache/Path.zig
@@ -161,17 +161,19 @@ pub fn formatEscapeString(path: Path, writer: *std.io.Writer) std.io.Writer.Erro
}
}
+/// Deprecated, use double quoted escape to print paths.
pub fn fmtEscapeChar(path: Path) std.fmt.Formatter(Path, formatEscapeChar) {
return .{ .data = path };
}
+/// Deprecated, use double quoted escape to print paths.
pub fn formatEscapeChar(path: Path, writer: *std.io.Writer) std.io.Writer.Error!void {
if (path.root_dir.path) |p| {
- try std.zig.charEscape(p, writer);
- if (path.sub_path.len > 0) try std.zig.charEscape(fs.path.sep_str, writer);
+ for (p) |byte| try std.zig.charEscape(byte, writer);
+ if (path.sub_path.len > 0) try writer.writeByte(fs.path.sep);
}
if (path.sub_path.len > 0) {
- try std.zig.charEscape(path.sub_path, writer);
+ for (path.sub_path) |byte| try std.zig.charEscape(byte, writer);
}
}
diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig
index ceabed0cb4..68ebe0f8da 100644
--- a/lib/std/Build/Step/Run.zig
+++ b/lib/std/Build/Step/Run.zig
@@ -1122,10 +1122,12 @@ fn runCommand(
// Wine's excessive stderr logging is only situationally helpful. Disable it by default, but
// allow the user to override it (e.g. with `WINEDEBUG=err+all`) if desired.
if (env_map.get("WINEDEBUG") == null) {
- // We don't own `env_map` at this point, so turn it into a copy before modifying it.
- env_map = arena.create(EnvMap) catch @panic("OOM");
- env_map.hash_map = try env_map.hash_map.cloneWithAllocator(arena);
- try env_map.put("WINEDEBUG", "-all");
+ // We don't own `env_map` at this point, so create a copy in order to modify it.
+ const new_env_map = arena.create(EnvMap) catch @panic("OOM");
+ new_env_map.hash_map = try env_map.hash_map.cloneWithAllocator(arena);
+ try new_env_map.put("WINEDEBUG", "-all");
+
+ env_map = new_env_map;
}
} else {
return failForeign(run, "-fwine", argv[0], exe);
@@ -1737,7 +1739,7 @@ fn sendMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag) !void {
.tag = tag,
.bytes_len = 0,
};
- try file.writeAll(std.mem.asBytes(&header));
+ try file.writeAll(@ptrCast(&header));
}
fn sendRunTestMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag, index: u32) !void {
diff --git a/lib/std/Io/Reader.zig b/lib/std/Io/Reader.zig
index f2a1ec7287..f25e113522 100644
--- a/lib/std/Io/Reader.zig
+++ b/lib/std/Io/Reader.zig
@@ -990,9 +990,9 @@ pub fn discardDelimiterLimit(r: *Reader, delimiter: u8, limit: Limit) DiscardDel
/// Returns `error.EndOfStream` if and only if there are fewer than `n` bytes
/// remaining.
///
-/// Asserts buffer capacity is at least `n`.
+/// If the end of stream is not encountered, asserts buffer capacity is at
+/// least `n`.
pub fn fill(r: *Reader, n: usize) Error!void {
- assert(n <= r.buffer.len);
if (r.seek + n <= r.end) {
@branchHint(.likely);
return;
@@ -1108,9 +1108,9 @@ pub fn takeVarInt(r: *Reader, comptime Int: type, endian: std.builtin.Endian, n:
/// Asserts the buffer was initialized with a capacity at least `@sizeOf(T)`.
///
/// See also:
-/// * `peekStructReference`
+/// * `peekStructPointer`
/// * `takeStruct`
-pub fn takeStructReference(r: *Reader, comptime T: type) Error!*align(1) T {
+pub fn takeStructPointer(r: *Reader, comptime T: type) Error!*align(1) T {
// Only extern and packed structs have defined in-memory layout.
comptime assert(@typeInfo(T).@"struct".layout != .auto);
return @ptrCast(try r.takeArray(@sizeOf(T)));
@@ -1122,9 +1122,9 @@ pub fn takeStructReference(r: *Reader, comptime T: type) Error!*align(1) T {
/// Asserts the buffer was initialized with a capacity at least `@sizeOf(T)`.
///
/// See also:
-/// * `takeStructReference`
+/// * `takeStructPointer`
/// * `peekStruct`
-pub fn peekStructReference(r: *Reader, comptime T: type) Error!*align(1) T {
+pub fn peekStructPointer(r: *Reader, comptime T: type) Error!*align(1) T {
// Only extern and packed structs have defined in-memory layout.
comptime assert(@typeInfo(T).@"struct".layout != .auto);
return @ptrCast(try r.peekArray(@sizeOf(T)));
@@ -1136,19 +1136,19 @@ pub fn peekStructReference(r: *Reader, comptime T: type) Error!*align(1) T {
/// when `endian` is comptime-known and matches the host endianness.
///
/// See also:
-/// * `takeStructReference`
+/// * `takeStructPointer`
/// * `peekStruct`
pub inline fn takeStruct(r: *Reader, comptime T: type, endian: std.builtin.Endian) Error!T {
switch (@typeInfo(T)) {
.@"struct" => |info| switch (info.layout) {
.auto => @compileError("ill-defined memory layout"),
.@"extern" => {
- var res = (try r.takeStructReference(T)).*;
+ var res = (try r.takeStructPointer(T)).*;
if (native_endian != endian) std.mem.byteSwapAllFields(T, &res);
return res;
},
.@"packed" => {
- return takeInt(r, info.backing_integer.?, endian);
+ return @bitCast(try takeInt(r, info.backing_integer.?, endian));
},
},
else => @compileError("not a struct"),
@@ -1162,18 +1162,18 @@ pub inline fn takeStruct(r: *Reader, comptime T: type, endian: std.builtin.Endia
///
/// See also:
/// * `takeStruct`
-/// * `peekStructReference`
+/// * `peekStructPointer`
pub inline fn peekStruct(r: *Reader, comptime T: type, endian: std.builtin.Endian) Error!T {
switch (@typeInfo(T)) {
.@"struct" => |info| switch (info.layout) {
.auto => @compileError("ill-defined memory layout"),
.@"extern" => {
- var res = (try r.peekStructReference(T)).*;
+ var res = (try r.peekStructPointer(T)).*;
if (native_endian != endian) std.mem.byteSwapAllFields(T, &res);
return res;
},
.@"packed" => {
- return peekInt(r, info.backing_integer.?, endian);
+ return @bitCast(try peekInt(r, info.backing_integer.?, endian));
},
},
else => @compileError("not a struct"),
@@ -1557,27 +1557,27 @@ test takeVarInt {
try testing.expectError(error.EndOfStream, r.takeVarInt(u16, .little, 1));
}
-test takeStructReference {
+test takeStructPointer {
var r: Reader = .fixed(&.{ 0x12, 0x00, 0x34, 0x56 });
const S = extern struct { a: u8, b: u16 };
switch (native_endian) {
- .little => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.takeStructReference(S)).*),
- .big => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.takeStructReference(S)).*),
+ .little => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.takeStructPointer(S)).*),
+ .big => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.takeStructPointer(S)).*),
}
- try testing.expectError(error.EndOfStream, r.takeStructReference(S));
+ try testing.expectError(error.EndOfStream, r.takeStructPointer(S));
}
-test peekStructReference {
+test peekStructPointer {
var r: Reader = .fixed(&.{ 0x12, 0x00, 0x34, 0x56 });
const S = extern struct { a: u8, b: u16 };
switch (native_endian) {
.little => {
- try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructReference(S)).*);
- try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructReference(S)).*);
+ try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructPointer(S)).*);
+ try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructPointer(S)).*);
},
.big => {
- try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructReference(S)).*);
- try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructReference(S)).*);
+ try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructPointer(S)).*);
+ try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructPointer(S)).*);
},
}
}
@@ -1724,6 +1724,27 @@ test "takeDelimiterInclusive when it rebases" {
}
}
+test "takeStruct and peekStruct packed" {
+ var r: Reader = .fixed(&.{ 0b11110000, 0b00110011 });
+ const S = packed struct(u16) { a: u2, b: u6, c: u7, d: u1 };
+
+ try testing.expectEqual(@as(S, .{
+ .a = 0b11,
+ .b = 0b001100,
+ .c = 0b1110000,
+ .d = 0b1,
+ }), try r.peekStruct(S, .big));
+
+ try testing.expectEqual(@as(S, .{
+ .a = 0b11,
+ .b = 0b001100,
+ .c = 0b1110000,
+ .d = 0b1,
+ }), try r.takeStruct(S, .big));
+
+ try testing.expectError(error.EndOfStream, r.takeStruct(S, .little));
+}
+
/// Provides a `Reader` implementation by passing data from an underlying
/// reader through `Hasher.update`.
///
diff --git a/lib/std/Io/Writer.zig b/lib/std/Io/Writer.zig
index 11bc05a00d..54d113ed91 100644
--- a/lib/std/Io/Writer.zig
+++ b/lib/std/Io/Writer.zig
@@ -867,18 +867,11 @@ pub inline fn writeSliceEndian(
}
}
-/// Asserts that the buffer storage capacity is at least enough to store `@sizeOf(Elem)`
-///
-/// Asserts that the buffer is aligned enough for `@alignOf(Elem)`.
pub fn writeSliceSwap(w: *Writer, Elem: type, slice: []const Elem) Error!void {
- var i: usize = 0;
- while (i < slice.len) {
- const dest_bytes = try w.writableSliceGreedy(@sizeOf(Elem));
- const dest: []Elem = @alignCast(@ptrCast(dest_bytes[0 .. dest_bytes.len - dest_bytes.len % @sizeOf(Elem)]));
- const copy_len = @min(dest.len, slice.len - i);
- @memcpy(dest[0..copy_len], slice[i..][0..copy_len]);
- i += copy_len;
- std.mem.byteSwapAllElements(Elem, dest);
+ for (slice) |elem| {
+ var tmp = elem;
+ std.mem.byteSwapAllFields(Elem, &tmp);
+ try w.writeAll(@ptrCast(&tmp));
}
}
@@ -1141,8 +1134,8 @@ pub fn printValue(
else => invalidFmtError(fmt, value),
},
't' => switch (@typeInfo(T)) {
- .error_set => return w.writeAll(@errorName(value)),
- .@"enum", .@"union" => return w.writeAll(@tagName(value)),
+ .error_set => return w.alignBufferOptions(@errorName(value), options),
+ .@"enum", .@"union" => return w.alignBufferOptions(@tagName(value), options),
else => invalidFmtError(fmt, value),
},
else => {},
@@ -2152,6 +2145,14 @@ test "bytes.hex" {
try testing.expectFmt("lowercase: 000ebabe\n", "lowercase: {x}\n", .{bytes_with_zeros});
}
+test "padding" {
+ const foo: enum { foo } = .foo;
+ try testing.expectFmt("tag: |foo |\n", "tag: |{t:<4}|\n", .{foo});
+
+ const bar: error{bar} = error.bar;
+ try testing.expectFmt("error: |bar |\n", "error: |{t:<4}|\n", .{bar});
+}
+
test fixed {
{
var buf: [255]u8 = undefined;
@@ -2650,9 +2651,10 @@ test writeStruct {
}
test writeSliceEndian {
- var buffer: [4]u8 align(2) = undefined;
+ var buffer: [5]u8 align(2) = undefined;
var w: Writer = .fixed(&buffer);
+ try w.writeByte('x');
const array: [2]u16 = .{ 0x1234, 0x5678 };
try writeSliceEndian(&w, u16, &array, .big);
- try testing.expectEqualSlices(u8, &.{ 0x12, 0x34, 0x56, 0x78 }, &buffer);
+ try testing.expectEqualSlices(u8, &.{ 'x', 0x12, 0x34, 0x56, 0x78 }, &buffer);
}
diff --git a/lib/std/hash.zig b/lib/std/hash.zig
index 27107e1ddf..77a25550f3 100644
--- a/lib/std/hash.zig
+++ b/lib/std/hash.zig
@@ -31,8 +31,6 @@ pub const CityHash64 = cityhash.CityHash64;
const wyhash = @import("hash/wyhash.zig");
pub const Wyhash = wyhash.Wyhash;
-pub const RapidHash = @import("hash/RapidHash.zig");
-
const xxhash = @import("hash/xxhash.zig");
pub const XxHash3 = xxhash.XxHash3;
pub const XxHash64 = xxhash.XxHash64;
diff --git a/lib/std/hash/RapidHash.zig b/lib/std/hash/RapidHash.zig
deleted file mode 100644
index 030c570df9..0000000000
--- a/lib/std/hash/RapidHash.zig
+++ /dev/null
@@ -1,125 +0,0 @@
-const std = @import("std");
-
-const readInt = std.mem.readInt;
-const assert = std.debug.assert;
-const expect = std.testing.expect;
-const expectEqual = std.testing.expectEqual;
-
-const RAPID_SEED: u64 = 0xbdd89aa982704029;
-const RAPID_SECRET: [3]u64 = .{ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3 };
-
-pub fn hash(seed: u64, input: []const u8) u64 {
- const sc = RAPID_SECRET;
- const len = input.len;
- var a: u64 = 0;
- var b: u64 = 0;
- var k = input;
- var is: [3]u64 = .{ seed, 0, 0 };
-
- is[0] ^= mix(seed ^ sc[0], sc[1]) ^ len;
-
- if (len <= 16) {
- if (len >= 4) {
- const d: u64 = ((len & 24) >> @intCast(len >> 3));
- const e = len - 4;
- a = (r32(k) << 32) | r32(k[e..]);
- b = ((r32(k[d..]) << 32) | r32(k[(e - d)..]));
- } else if (len > 0)
- a = (@as(u64, k[0]) << 56) | (@as(u64, k[len >> 1]) << 32) | @as(u64, k[len - 1]);
- } else {
- var remain = len;
- if (len > 48) {
- is[1] = is[0];
- is[2] = is[0];
- while (remain >= 96) {
- inline for (0..6) |i| {
- const m1 = r64(k[8 * i * 2 ..]);
- const m2 = r64(k[8 * (i * 2 + 1) ..]);
- is[i % 3] = mix(m1 ^ sc[i % 3], m2 ^ is[i % 3]);
- }
- k = k[96..];
- remain -= 96;
- }
- if (remain >= 48) {
- inline for (0..3) |i| {
- const m1 = r64(k[8 * i * 2 ..]);
- const m2 = r64(k[8 * (i * 2 + 1) ..]);
- is[i] = mix(m1 ^ sc[i], m2 ^ is[i]);
- }
- k = k[48..];
- remain -= 48;
- }
-
- is[0] ^= is[1] ^ is[2];
- }
-
- if (remain > 16) {
- is[0] = mix(r64(k) ^ sc[2], r64(k[8..]) ^ is[0] ^ sc[1]);
- if (remain > 32) {
- is[0] = mix(r64(k[16..]) ^ sc[2], r64(k[24..]) ^ is[0]);
- }
- }
-
- a = r64(input[len - 16 ..]);
- b = r64(input[len - 8 ..]);
- }
-
- a ^= sc[1];
- b ^= is[0];
- mum(&a, &b);
- return mix(a ^ sc[0] ^ len, b ^ sc[1]);
-}
-
-test "RapidHash.hash" {
- const bytes: []const u8 = "abcdefgh" ** 128;
-
- const sizes: [13]u64 = .{ 0, 1, 2, 3, 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
-
- const outcomes: [13]u64 = .{
- 0x5a6ef77074ebc84b,
- 0xc11328477bc0f5d1,
- 0x5644ac035e40d569,
- 0x347080fbf5fcd81,
- 0x56b66b8dc802bcc,
- 0xb6bf9055973aac7c,
- 0xed56d62eead1e402,
- 0xc19072d767da8ffb,
- 0x89bb40a9928a4f0d,
- 0xe0af7c5e7b6e29fd,
- 0x9a3ed35fbedfa11a,
- 0x4c684b2119ca19fb,
- 0x4b575f5bf25600d6,
- };
-
- var success: bool = true;
- for (sizes, outcomes) |s, e| {
- const r = hash(RAPID_SEED, bytes[0..s]);
-
- expectEqual(e, r) catch |err| {
- std.debug.print("Failed on {d}: {!}\n", .{ s, err });
- success = false;
- };
- }
- try expect(success);
-}
-
-inline fn mum(a: *u64, b: *u64) void {
- const r = @as(u128, a.*) * b.*;
- a.* = @truncate(r);
- b.* = @truncate(r >> 64);
-}
-
-inline fn mix(a: u64, b: u64) u64 {
- var copy_a = a;
- var copy_b = b;
- mum(©_a, ©_b);
- return copy_a ^ copy_b;
-}
-
-inline fn r64(p: []const u8) u64 {
- return readInt(u64, p[0..8], .little);
-}
-
-inline fn r32(p: []const u8) u64 {
- return readInt(u32, p[0..4], .little);
-}
diff --git a/lib/std/hash/benchmark.zig b/lib/std/hash/benchmark.zig
index b899c9d429..33cb3432bf 100644
--- a/lib/std/hash/benchmark.zig
+++ b/lib/std/hash/benchmark.zig
@@ -59,12 +59,6 @@ const hashes = [_]Hash{
.ty = hash.crc.Crc32,
.name = "crc32",
},
- Hash{
- .ty = hash.RapidHash,
- .name = "rapidhash",
- .has_iterative_api = false,
- .init_u64 = 0,
- },
Hash{
.ty = hash.CityHash32,
.name = "cityhash-32",
diff --git a/lib/std/json.zig b/lib/std/json.zig
index 246c98817e..f81ac1cd65 100644
--- a/lib/std/json.zig
+++ b/lib/std/json.zig
@@ -44,7 +44,7 @@ test Value {
test Stringify {
var out: std.io.Writer.Allocating = .init(testing.allocator);
var write_stream: Stringify = .{
- .writer = &out.interface,
+ .writer = &out.writer,
.options = .{ .whitespace = .indent_2 },
};
defer out.deinit();
@@ -66,18 +66,18 @@ pub const Value = @import("json/dynamic.zig").Value;
pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap;
-pub const validate = @import("json/scanner.zig").validate;
-pub const Error = @import("json/scanner.zig").Error;
-pub const reader = @import("json/scanner.zig").reader;
-pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size;
-pub const Token = @import("json/scanner.zig").Token;
-pub const TokenType = @import("json/scanner.zig").TokenType;
-pub const Diagnostics = @import("json/scanner.zig").Diagnostics;
-pub const AllocWhen = @import("json/scanner.zig").AllocWhen;
-pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len;
-pub const Reader = @import("json/scanner.zig").Reader;
-pub const Scanner = @import("json/scanner.zig").Scanner;
-pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger;
+pub const Scanner = @import("json/Scanner.zig");
+pub const validate = Scanner.validate;
+pub const Error = Scanner.Error;
+pub const reader = Scanner.reader;
+pub const default_buffer_size = Scanner.default_buffer_size;
+pub const Token = Scanner.Token;
+pub const TokenType = Scanner.TokenType;
+pub const Diagnostics = Scanner.Diagnostics;
+pub const AllocWhen = Scanner.AllocWhen;
+pub const default_max_value_len = Scanner.default_max_value_len;
+pub const Reader = Scanner.Reader;
+pub const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger;
pub const ParseOptions = @import("json/static.zig").ParseOptions;
pub const Parsed = @import("json/static.zig").Parsed;
@@ -101,10 +101,10 @@ pub fn fmt(value: anytype, options: Stringify.Options) Formatter(@TypeOf(value))
test fmt {
const expectFmt = std.testing.expectFmt;
- try expectFmt("123", "{}", .{fmt(@as(u32, 123), .{})});
+ try expectFmt("123", "{f}", .{fmt(@as(u32, 123), .{})});
try expectFmt(
\\{"num":927,"msg":"hello","sub":{"mybool":true}}
- , "{}", .{fmt(struct {
+ , "{f}", .{fmt(struct {
num: u32,
msg: []const u8,
sub: struct {
@@ -123,14 +123,7 @@ pub fn Formatter(comptime T: type) type {
value: T,
options: Stringify.Options,
- pub fn format(
- self: @This(),
- comptime fmt_spec: []const u8,
- options: std.fmt.FormatOptions,
- writer: *std.io.Writer,
- ) !void {
- comptime std.debug.assert(fmt_spec.len == 0);
- _ = options;
+ pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
try Stringify.value(self.value, self.options, writer);
}
};
@@ -138,7 +131,7 @@ pub fn Formatter(comptime T: type) type {
test {
_ = @import("json/test.zig");
- _ = @import("json/scanner.zig");
+ _ = Scanner;
_ = @import("json/dynamic.zig");
_ = @import("json/hashmap.zig");
_ = @import("json/static.zig");
diff --git a/lib/std/json/Scanner.zig b/lib/std/json/Scanner.zig
new file mode 100644
index 0000000000..b9c3c506a5
--- /dev/null
+++ b/lib/std/json/Scanner.zig
@@ -0,0 +1,1767 @@
+//! The lowest level parsing API in this package;
+//! supports streaming input with a low memory footprint.
+//! The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input.
+//! Specifically `d/8` bytes are required for this purpose,
+//! with some extra buffer according to the implementation of `std.ArrayList`.
+//!
+//! This scanner can emit partial tokens; see `std.json.Token`.
+//! The input to this class is a sequence of input buffers that you must supply one at a time.
+//! Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned.
+//! Then call `feedInput()` again and so forth.
+//! Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`,
+//! or when `error.BufferUnderrun` requests more data and there is no more.
+//! Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned.
+//!
+//! Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259
+//! * RFC 8259 requires JSON documents be valid UTF-8,
+//! but makes an allowance for systems that are "part of a closed ecosystem".
+//! I have no idea what that's supposed to mean in the context of a standard specification.
+//! This implementation requires inputs to be valid UTF-8.
+//! * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits,
+//! but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed.
+//! (RFC 5234 defines HEXDIG to only allow uppercase.)
+//! * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value".
+//! See http://www.unicode.org/glossary/#unicode_scalar_value .
+//! * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences,
+//! but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?),
+//! which would mean that unpaired surrogate halves are forbidden.
+//! By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to)
+//! explicitly allows unpaired surrogate halves.
+//! This implementation forbids unpaired surrogate halves in \u sequences.
+//! If a high surrogate half appears in a \u sequence,
+//! then a low surrogate half must immediately follow in \u notation.
+//! * RFC 8259 allows implementations to "accept non-JSON forms or extensions".
+//! This implementation does not accept any of that.
+//! * RFC 8259 allows implementations to put limits on "the size of texts",
+//! "the maximum depth of nesting", "the range and precision of numbers",
+//! and "the length and character contents of strings".
+//! This low-level implementation does not limit these,
+//! except where noted above, and except that nesting depth requires memory allocation.
+//! Note that this low-level API does not interpret numbers numerically,
+//! but simply emits their source form for some higher level code to make sense of.
+//! * This low-level implementation allows duplicate object keys,
+//! and key/value pairs are emitted in the order they appear in the input.
+
+const Scanner = @This();
+const std = @import("std");
+
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const assert = std.debug.assert;
+const BitStack = std.BitStack;
+
+state: State = .value,
+string_is_object_key: bool = false,
+stack: BitStack,
+value_start: usize = undefined,
+utf16_code_units: [2]u16 = undefined,
+
+input: []const u8 = "",
+cursor: usize = 0,
+is_end_of_input: bool = false,
+diagnostics: ?*Diagnostics = null,
+
+/// The allocator is only used to track `[]` and `{}` nesting levels.
+pub fn initStreaming(allocator: Allocator) @This() {
+ return .{
+ .stack = BitStack.init(allocator),
+ };
+}
+/// Use this if your input is a single slice.
+/// This is effectively equivalent to:
+/// ```
+/// initStreaming(allocator);
+/// feedInput(complete_input);
+/// endInput();
+/// ```
+pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() {
+ return .{
+ .stack = BitStack.init(allocator),
+ .input = complete_input,
+ .is_end_of_input = true,
+ };
+}
+pub fn deinit(self: *@This()) void {
+ self.stack.deinit();
+ self.* = undefined;
+}
+
+pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+ diagnostics.cursor_pointer = &self.cursor;
+ self.diagnostics = diagnostics;
+}
+
+/// Call this whenever you get `error.BufferUnderrun` from `next()`.
+/// When there is no more input to provide, call `endInput()`.
+pub fn feedInput(self: *@This(), input: []const u8) void {
+ assert(self.cursor == self.input.len); // Not done with the last input slice.
+ if (self.diagnostics) |diag| {
+ diag.total_bytes_before_current_input += self.input.len;
+ // This usually goes "negative" to measure how far before the beginning
+ // of the new buffer the current line started.
+ diag.line_start_cursor -%= self.cursor;
+ }
+ self.input = input;
+ self.cursor = 0;
+ self.value_start = 0;
+}
+/// Call this when you will no longer call `feedInput()` anymore.
+/// This can be called either immediately after the last `feedInput()`,
+/// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`.
+/// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`.
+pub fn endInput(self: *@This()) void {
+ self.is_end_of_input = true;
+}
+
+pub const NextError = Error || Allocator.Error || error{BufferUnderrun};
+pub const AllocError = Error || Allocator.Error || error{ValueTooLong};
+pub const PeekError = Error || error{BufferUnderrun};
+pub const SkipError = Error || Allocator.Error;
+pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun};
+
+/// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
+ return self.nextAllocMax(allocator, when, default_max_value_len);
+}
+
+/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
+ assert(self.is_end_of_input); // This function is not available in streaming mode.
+ const token_type = self.peekNextTokenType() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ switch (token_type) {
+ .number, .string => {
+ var value_list = ArrayList(u8).init(allocator);
+ errdefer {
+ value_list.deinit();
+ }
+ if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) |slice| {
+ return if (token_type == .number)
+ Token{ .number = slice }
+ else
+ Token{ .string = slice };
+ } else {
+ return if (token_type == .number)
+ Token{ .allocated_number = try value_list.toOwnedSlice() }
+ else
+ Token{ .allocated_string = try value_list.toOwnedSlice() };
+ }
+ },
+
+ // Simple tokens never alloc.
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => return self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ },
+ }
+}
+
+/// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 {
+ return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+}
+/// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`.
+/// When allocation is not necessary with `.alloc_if_needed`,
+/// this method returns the content slice from the input buffer, and `value_list` is not touched.
+/// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`,
+/// and returns `null` once the final `.number` or `.string` token has been written into it.
+/// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list.
+/// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation
+/// can be resumed by passing the same array list in again.
+/// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type;
+/// the caller of this method is expected to know which type of token is being processed.
+pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 {
+ while (true) {
+ const token = try self.next();
+ switch (token) {
+ // Accumulate partial values.
+ .partial_number, .partial_string => |slice| {
+ try appendSlice(value_list, slice, max_value_len);
+ },
+ .partial_string_escaped_1 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_2 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_3 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_4 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+
+ // Return complete values.
+ .number => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary.
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+ .string => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary.
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this.
+
+ .allocated_number, .allocated_string => unreachable,
+ }
+ }
+}
+
+/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+/// If the next token type is `.object_begin` or `.array_begin`,
+/// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found.
+/// If the next token type is `.number` or `.string`,
+/// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found.
+/// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once.
+/// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`;
+/// see `peekNextTokenType()`.
+pub fn skipValue(self: *@This()) SkipError!void {
+ assert(self.is_end_of_input); // This function is not available in streaming mode.
+ switch (self.peekNextTokenType() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) {
+ .object_begin, .array_begin => {
+ self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ },
+ .number, .string => {
+ while (true) {
+ switch (self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) {
+ .partial_number,
+ .partial_string,
+ .partial_string_escaped_1,
+ .partial_string_escaped_2,
+ .partial_string_escaped_3,
+ .partial_string_escaped_4,
+ => continue,
+
+ .number, .string => break,
+
+ else => unreachable,
+ }
+ }
+ },
+ .true, .false, .null => {
+ _ = self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ },
+
+ .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+ }
+}
+
+/// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
+/// Unlike `skipValue()`, this function is available in streaming mode.
+pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void {
+ while (true) {
+ switch (try self.next()) {
+ .object_end, .array_end => {
+ if (self.stackHeight() == terminal_stack_height) break;
+ },
+ .end_of_document => unreachable,
+ else => continue,
+ }
+ }
+}
+
+/// The depth of `{}` or `[]` nesting levels at the current position.
+pub fn stackHeight(self: *const @This()) usize {
+ return self.stack.bit_len;
+}
+
+/// Pre allocate memory to hold the given number of nesting levels.
+/// `stackHeight()` up to the given number will not cause allocations.
+pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void {
+ try self.stack.ensureTotalCapacity(height);
+}
+
+/// See `std.json.Token` for documentation of this function.
+pub fn next(self: *@This()) NextError!Token {
+ state_loop: while (true) {
+ switch (self.state) {
+ .value => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ // Object, Array
+ '{' => {
+ try self.stack.push(OBJECT_MODE);
+ self.cursor += 1;
+ self.state = .object_start;
+ return .object_begin;
+ },
+ '[' => {
+ try self.stack.push(ARRAY_MODE);
+ self.cursor += 1;
+ self.state = .array_start;
+ return .array_begin;
+ },
+
+ // String
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ continue :state_loop;
+ },
+
+ // Number
+ '1'...'9' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_int;
+ continue :state_loop;
+ },
+ '0' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_leading_zero;
+ continue :state_loop;
+ },
+ '-' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_minus;
+ continue :state_loop;
+ },
+
+ // literal values
+ 't' => {
+ self.cursor += 1;
+ self.state = .literal_t;
+ continue :state_loop;
+ },
+ 'f' => {
+ self.cursor += 1;
+ self.state = .literal_f;
+ continue :state_loop;
+ },
+ 'n' => {
+ self.cursor += 1;
+ self.state = .literal_n;
+ continue :state_loop;
+ },
+
+ else => return error.SyntaxError,
+ }
+ },
+
+ .post_value => {
+ if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+ const c = self.input[self.cursor];
+ if (self.string_is_object_key) {
+ self.string_is_object_key = false;
+ switch (c) {
+ ':' => {
+ self.cursor += 1;
+ self.state = .value;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ }
+
+ switch (c) {
+ '}' => {
+ if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError;
+ self.cursor += 1;
+ // stay in .post_value state.
+ return .object_end;
+ },
+ ']' => {
+ if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError;
+ self.cursor += 1;
+ // stay in .post_value state.
+ return .array_end;
+ },
+ ',' => {
+ switch (self.stack.peek()) {
+ OBJECT_MODE => {
+ self.state = .object_post_comma;
+ },
+ ARRAY_MODE => {
+ self.state = .value;
+ },
+ }
+ self.cursor += 1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .object_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ self.string_is_object_key = true;
+ continue :state_loop;
+ },
+ '}' => {
+ self.cursor += 1;
+ _ = self.stack.pop();
+ self.state = .post_value;
+ return .object_end;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .object_post_comma => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ self.string_is_object_key = true;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .array_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ ']' => {
+ self.cursor += 1;
+ _ = self.stack.pop();
+ self.state = .post_value;
+ return .array_end;
+ },
+ else => {
+ self.state = .value;
+ continue :state_loop;
+ },
+ }
+ },
+
+ .number_minus => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0' => {
+ self.cursor += 1;
+ self.state = .number_leading_zero;
+ continue :state_loop;
+ },
+ '1'...'9' => {
+ self.cursor += 1;
+ self.state = .number_int;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_leading_zero => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true);
+ switch (self.input[self.cursor]) {
+ '.' => {
+ self.cursor += 1;
+ self.state = .number_post_dot;
+ continue :state_loop;
+ },
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ },
+ .number_int => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ '.' => {
+ self.cursor += 1;
+ self.state = .number_post_dot;
+ continue :state_loop;
+ },
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+ .number_post_dot => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_frac;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_frac => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+ .number_post_e => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_exp;
+ continue :state_loop;
+ },
+ '+', '-' => {
+ self.cursor += 1;
+ self.state = .number_post_e_sign;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_post_e_sign => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_exp;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_exp => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+
+ .string => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string.
+
+ // ASCII plain text.
+ 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue,
+
+ // Special characters.
+ '"' => {
+ const result = Token{ .string = self.takeValueSlice() };
+ self.cursor += 1;
+ self.state = .post_value;
+ return result;
+ },
+ '\\' => {
+ const slice = self.takeValueSlice();
+ self.cursor += 1;
+ self.state = .string_backslash;
+ if (slice.len > 0) return Token{ .partial_string = slice };
+ continue :state_loop;
+ },
+
+ // UTF-8 validation.
+ // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+ 0xC2...0xDF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ 0xE0 => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte_guard_against_overlong;
+ continue :state_loop;
+ },
+ 0xE1...0xEC, 0xEE...0xEF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ 0xED => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half;
+ continue :state_loop;
+ },
+ 0xF0 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte_guard_against_overlong;
+ continue :state_loop;
+ },
+ 0xF1...0xF3 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte;
+ continue :state_loop;
+ },
+ 0xF4 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte_guard_against_too_large;
+ continue :state_loop;
+ },
+ 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8.
+ }
+ }
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ const slice = self.takeValueSlice();
+ if (slice.len > 0) return Token{ .partial_string = slice };
+ return error.BufferUnderrun;
+ },
+ .string_backslash => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ '"', '\\', '/' => {
+ // Since these characters now represent themselves literally,
+ // we can simply begin the next plaintext slice here.
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .string;
+ continue :state_loop;
+ },
+ 'b' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{0x08} };
+ },
+ 'f' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{0x0c} };
+ },
+ 'n' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\n'} };
+ },
+ 'r' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\r'} };
+ },
+ 't' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\t'} };
+ },
+ 'u' => {
+ self.cursor += 1;
+ self.state = .string_backslash_u;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .string_backslash_u => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] = @as(u16, c - '0') << 12;
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_1;
+ continue :state_loop;
+ },
+ .string_backslash_u_1 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] |= @as(u16, c - '0') << 8;
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_2;
+ continue :state_loop;
+ },
+ .string_backslash_u_2 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] |= @as(u16, c - '0') << 4;
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_3;
+ continue :state_loop;
+ },
+ .string_backslash_u_3 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] |= c - '0';
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] |= c - 'A' + 10;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] |= c - 'a' + 10;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) {
+ self.state = .string_surrogate_half;
+ continue :state_loop;
+ } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) {
+ return error.SyntaxError; // Unexpected low surrogate half.
+ } else {
+ self.value_start = self.cursor;
+ self.state = .string;
+ return partialStringCodepoint(self.utf16_code_units[0]);
+ }
+ },
+ .string_surrogate_half => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ '\\' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash_u;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 'D', 'd' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] = 0xD << 12;
+ self.state = .string_surrogate_half_backslash_u_1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u_1 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ 'C'...'F' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8;
+ self.state = .string_surrogate_half_backslash_u_2;
+ continue :state_loop;
+ },
+ 'c'...'f' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8;
+ self.state = .string_surrogate_half_backslash_u_2;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u_2 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - '0') << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ 'A'...'F' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ 'a'...'f' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .string_surrogate_half_backslash_u_3 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[1] |= c - '0';
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[1] |= c - 'A' + 10;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[1] |= c - 'a' + 10;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable;
+ return partialStringCodepoint(code_point);
+ },
+
+ .string_utf8_last_byte => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte_guard_against_overlong => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0xA0...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0x9F => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte_guard_against_overlong => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x90...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte_guard_against_too_large => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0x8F => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+
+ .literal_t => {
+ switch (try self.expectByte()) {
+ 'r' => {
+ self.cursor += 1;
+ self.state = .literal_tr;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_tr => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .literal_tru;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_tru => {
+ switch (try self.expectByte()) {
+ 'e' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .true;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_f => {
+ switch (try self.expectByte()) {
+ 'a' => {
+ self.cursor += 1;
+ self.state = .literal_fa;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fa => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .literal_fal;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fal => {
+ switch (try self.expectByte()) {
+ 's' => {
+ self.cursor += 1;
+ self.state = .literal_fals;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fals => {
+ switch (try self.expectByte()) {
+ 'e' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .false;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_n => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .literal_nu;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_nu => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .literal_nul;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_nul => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .null;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ }
+ unreachable;
+ }
+}
+
+/// Seeks ahead in the input until the first byte of the next token (or the end of the input)
+/// determines which type of token will be returned from the next `next*()` call.
+/// This function is idempotent, only advancing past commas, colons, and inter-token whitespace.
+pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
+ state_loop: while (true) {
+ switch (self.state) {
+ .value => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '{' => return .object_begin,
+ '[' => return .array_begin,
+ '"' => return .string,
+ '-', '0'...'9' => return .number,
+ 't' => return .true,
+ 'f' => return .false,
+ 'n' => return .null,
+ else => return error.SyntaxError,
+ }
+ },
+
+ .post_value => {
+ if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+ const c = self.input[self.cursor];
+ if (self.string_is_object_key) {
+ self.string_is_object_key = false;
+ switch (c) {
+ ':' => {
+ self.cursor += 1;
+ self.state = .value;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ }
+
+ switch (c) {
+ '}' => return .object_end,
+ ']' => return .array_end,
+ ',' => {
+ switch (self.stack.peek()) {
+ OBJECT_MODE => {
+ self.state = .object_post_comma;
+ },
+ ARRAY_MODE => {
+ self.state = .value;
+ },
+ }
+ self.cursor += 1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .object_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => return .string,
+ '}' => return .object_end,
+ else => return error.SyntaxError,
+ }
+ },
+ .object_post_comma => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => return .string,
+ else => return error.SyntaxError,
+ }
+ },
+
+ .array_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ ']' => return .array_end,
+ else => {
+ self.state = .value;
+ continue :state_loop;
+ },
+ }
+ },
+
+ .number_minus,
+ .number_leading_zero,
+ .number_int,
+ .number_post_dot,
+ .number_frac,
+ .number_post_e,
+ .number_post_e_sign,
+ .number_exp,
+ => return .number,
+
+ .string,
+ .string_backslash,
+ .string_backslash_u,
+ .string_backslash_u_1,
+ .string_backslash_u_2,
+ .string_backslash_u_3,
+ .string_surrogate_half,
+ .string_surrogate_half_backslash,
+ .string_surrogate_half_backslash_u,
+ .string_surrogate_half_backslash_u_1,
+ .string_surrogate_half_backslash_u_2,
+ .string_surrogate_half_backslash_u_3,
+ => return .string,
+
+ .string_utf8_last_byte,
+ .string_utf8_second_to_last_byte,
+ .string_utf8_second_to_last_byte_guard_against_overlong,
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+ .string_utf8_third_to_last_byte,
+ .string_utf8_third_to_last_byte_guard_against_overlong,
+ .string_utf8_third_to_last_byte_guard_against_too_large,
+ => return .string,
+
+ .literal_t,
+ .literal_tr,
+ .literal_tru,
+ => return .true,
+ .literal_f,
+ .literal_fa,
+ .literal_fal,
+ .literal_fals,
+ => return .false,
+ .literal_n,
+ .literal_nu,
+ .literal_nul,
+ => return .null,
+ }
+ unreachable;
+ }
+}
+
+const State = enum {
+ value,
+ post_value,
+
+ object_start,
+ object_post_comma,
+
+ array_start,
+
+ number_minus,
+ number_leading_zero,
+ number_int,
+ number_post_dot,
+ number_frac,
+ number_post_e,
+ number_post_e_sign,
+ number_exp,
+
+ string,
+ string_backslash,
+ string_backslash_u,
+ string_backslash_u_1,
+ string_backslash_u_2,
+ string_backslash_u_3,
+ string_surrogate_half,
+ string_surrogate_half_backslash,
+ string_surrogate_half_backslash_u,
+ string_surrogate_half_backslash_u_1,
+ string_surrogate_half_backslash_u_2,
+ string_surrogate_half_backslash_u_3,
+
+ // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+ string_utf8_last_byte, // State A
+ string_utf8_second_to_last_byte, // State B
+ string_utf8_second_to_last_byte_guard_against_overlong, // State C
+ string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D
+ string_utf8_third_to_last_byte, // State E
+ string_utf8_third_to_last_byte_guard_against_overlong, // State F
+ string_utf8_third_to_last_byte_guard_against_too_large, // State G
+
+ literal_t,
+ literal_tr,
+ literal_tru,
+ literal_f,
+ literal_fa,
+ literal_fal,
+ literal_fals,
+ literal_n,
+ literal_nu,
+ literal_nul,
+};
+
+fn expectByte(self: *const @This()) !u8 {
+ if (self.cursor < self.input.len) {
+ return self.input[self.cursor];
+ }
+ // No byte.
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ return error.BufferUnderrun;
+}
+
+fn skipWhitespace(self: *@This()) void {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ // Whitespace
+ ' ', '\t', '\r' => continue,
+ '\n' => {
+ if (self.diagnostics) |diag| {
+ diag.line_number += 1;
+ // This will count the newline itself,
+ // which means a straight-forward subtraction will give a 1-based column number.
+ diag.line_start_cursor = self.cursor;
+ }
+ continue;
+ },
+ else => return,
+ }
+ }
+}
+
+fn skipWhitespaceExpectByte(self: *@This()) !u8 {
+ self.skipWhitespace();
+ return self.expectByte();
+}
+
+fn skipWhitespaceCheckEnd(self: *@This()) !bool {
+ self.skipWhitespace();
+ if (self.cursor >= self.input.len) {
+ // End of buffer.
+ if (self.is_end_of_input) {
+ // End of everything.
+ if (self.stackHeight() == 0) {
+ // We did it!
+ return true;
+ }
+ return error.UnexpectedEndOfInput;
+ }
+ return error.BufferUnderrun;
+ }
+ if (self.stackHeight() == 0) return error.SyntaxError;
+ return false;
+}
+
+fn takeValueSlice(self: *@This()) []const u8 {
+ const slice = self.input[self.value_start..self.cursor];
+ self.value_start = self.cursor;
+ return slice;
+}
+fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 {
+ // Check if the escape sequence started before the current input buffer.
+ // (The algebra here is awkward to avoid unsigned underflow,
+ // but it's just making sure the slice on the next line isn't UB.)
+ if (self.cursor <= self.value_start + trailing_negative_offset) return "";
+ const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset];
+ // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter,
+ // because we always set it again while emitting the .partial_string_escaped_*.
+ self.value_start = self.cursor;
+ return slice;
+}
+
+fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token {
+ const slice = self.takeValueSlice();
+ if (self.is_end_of_input) {
+ if (!allow_end) return error.UnexpectedEndOfInput;
+ self.state = .post_value;
+ return Token{ .number = slice };
+ }
+ if (slice.len == 0) return error.BufferUnderrun;
+ return Token{ .partial_number = slice };
+}
+
+fn endOfBufferInString(self: *@This()) !Token {
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) {
+ // Don't include the escape sequence in the partial string.
+ .string_backslash => 1,
+ .string_backslash_u => 2,
+ .string_backslash_u_1 => 3,
+ .string_backslash_u_2 => 4,
+ .string_backslash_u_3 => 5,
+ .string_surrogate_half => 6,
+ .string_surrogate_half_backslash => 7,
+ .string_surrogate_half_backslash_u => 8,
+ .string_surrogate_half_backslash_u_1 => 9,
+ .string_surrogate_half_backslash_u_2 => 10,
+ .string_surrogate_half_backslash_u_3 => 11,
+
+ // Include everything up to the cursor otherwise.
+ .string,
+ .string_utf8_last_byte,
+ .string_utf8_second_to_last_byte,
+ .string_utf8_second_to_last_byte_guard_against_overlong,
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+ .string_utf8_third_to_last_byte,
+ .string_utf8_third_to_last_byte_guard_against_overlong,
+ .string_utf8_third_to_last_byte_guard_against_too_large,
+ => 0,
+
+ else => unreachable,
+ });
+ if (slice.len == 0) return error.BufferUnderrun;
+ return Token{ .partial_string = slice };
+}
+
+fn partialStringCodepoint(code_point: u21) Token {
+ var buf: [4]u8 = undefined;
+ switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) {
+ 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* },
+ 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* },
+ 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* },
+ 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* },
+ else => unreachable,
+ }
+}
+
+/// Scan the input and check for malformed JSON.
+/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`.
+/// Returns any errors from the allocator as-is, which is unlikely,
+/// but can be caused by extreme nesting depth in the input.
+pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool {
+ var scanner = Scanner.initCompleteInput(allocator, s);
+ defer scanner.deinit();
+
+ while (true) {
+ const token = scanner.next() catch |err| switch (err) {
+ error.SyntaxError, error.UnexpectedEndOfInput => return false,
+ error.OutOfMemory => return error.OutOfMemory,
+ error.BufferUnderrun => unreachable,
+ };
+ if (token == .end_of_document) break;
+ }
+
+ return true;
+}
+
+/// The parsing errors are divided into two categories:
+/// * `SyntaxError` is for clearly malformed JSON documents,
+/// such as giving an input document that isn't JSON at all.
+/// * `UnexpectedEndOfInput` is for signaling that everything's been
+/// valid so far, but the input appears to be truncated for some reason.
+/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`.
+pub const Error = error{ SyntaxError, UnexpectedEndOfInput };
+
+/// Used by `json.reader`.
+pub const default_buffer_size = 0x1000;
+
+/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
+/// ```
+/// = .end_of_document
+/// =
+/// |