diff --git a/doc/langref.html.in b/doc/langref.html.in index dcf13e812d..e8189e5c42 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -7987,7 +7987,7 @@ AsmInput <- COLON AsmInputList AsmClobbers? AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN -AsmClobbers <- COLON StringList +AsmClobbers <- COLON Expr # *** Helper grammar *** BreakLabel <- COLON IDENTIFIER diff --git a/lib/compiler/resinator/main.zig b/lib/compiler/resinator/main.zig index 4c952c03c4..30e9c825bb 100644 --- a/lib/compiler/resinator/main.zig +++ b/lib/compiler/resinator/main.zig @@ -292,12 +292,14 @@ pub fn main() !void { }; defer depfile.close(); - const depfile_writer = depfile.deprecatedWriter(); - var depfile_buffered_writer = std.io.bufferedWriter(depfile_writer); + var depfile_buffer: [1024]u8 = undefined; + var depfile_writer = depfile.writer(&depfile_buffer); switch (options.depfile_fmt) { .json => { - var write_stream = std.json.writeStream(depfile_buffered_writer.writer(), .{ .whitespace = .indent_2 }); - defer write_stream.deinit(); + var write_stream: std.json.Stringify = .{ + .writer = &depfile_writer.interface, + .options = .{ .whitespace = .indent_2 }, + }; try write_stream.beginArray(); for (dependencies_list.items) |dep_path| { @@ -306,7 +308,7 @@ pub fn main() !void { try write_stream.endArray(); }, } - try depfile_buffered_writer.flush(); + try depfile_writer.interface.flush(); } } diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index a69066f09c..8b60a75399 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -10,10 +10,10 @@ pub const std_options: std.Options = .{ }; var log_err_count: usize = 0; -var fba_buffer: [8192]u8 = undefined; var fba = std.heap.FixedBufferAllocator.init(&fba_buffer); -var stdin_buffer: [std.heap.page_size_min]u8 align(std.heap.page_size_min) = undefined; -var stdout_buffer: [std.heap.page_size_min]u8 align(std.heap.page_size_min) = undefined; +var fba_buffer: [8192]u8 = undefined; +var stdin_buffer: [4096]u8 = undefined; +var stdout_buffer: [4096]u8 = undefined; const crippled = switch (builtin.zig_backend) { .stage2_powerpc, @@ -68,8 +68,8 @@ pub fn main() void { fn mainServer() !void { @disableInstrumentation(); - var stdin_reader = std.fs.File.stdin().reader(&stdin_buffer); - var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); + var stdin_reader = std.fs.File.stdin().readerStreaming(&stdin_buffer); + var stdout_writer = std.fs.File.stdout().writerStreaming(&stdout_buffer); var server = try std.zig.Server.init(.{ .in = &stdin_reader.interface, .out = &stdout_writer.interface, @@ -104,7 +104,7 @@ fn mainServer() !void { defer testing.allocator.free(expected_panic_msgs); for (test_fns, names, expected_panic_msgs) |test_fn, *name, *expected_panic_msg| { - name.* = @as(u32, @intCast(string_bytes.items.len)); + name.* = @intCast(string_bytes.items.len); try string_bytes.ensureUnusedCapacity(testing.allocator, test_fn.name.len + 1); string_bytes.appendSliceAssumeCapacity(test_fn.name); string_bytes.appendAssumeCapacity(0); diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig index a0a58067fc..efd0f86105 100644 --- a/lib/std/Build/Cache/Path.zig +++ b/lib/std/Build/Cache/Path.zig @@ -161,17 +161,19 @@ pub fn formatEscapeString(path: Path, writer: *std.io.Writer) std.io.Writer.Erro } } +/// Deprecated, use double quoted escape to print paths. pub fn fmtEscapeChar(path: Path) std.fmt.Formatter(Path, formatEscapeChar) { return .{ .data = path }; } +/// Deprecated, use double quoted escape to print paths. pub fn formatEscapeChar(path: Path, writer: *std.io.Writer) std.io.Writer.Error!void { if (path.root_dir.path) |p| { - try std.zig.charEscape(p, writer); - if (path.sub_path.len > 0) try std.zig.charEscape(fs.path.sep_str, writer); + for (p) |byte| try std.zig.charEscape(byte, writer); + if (path.sub_path.len > 0) try writer.writeByte(fs.path.sep); } if (path.sub_path.len > 0) { - try std.zig.charEscape(path.sub_path, writer); + for (path.sub_path) |byte| try std.zig.charEscape(byte, writer); } } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index ceabed0cb4..68ebe0f8da 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -1122,10 +1122,12 @@ fn runCommand( // Wine's excessive stderr logging is only situationally helpful. Disable it by default, but // allow the user to override it (e.g. with `WINEDEBUG=err+all`) if desired. if (env_map.get("WINEDEBUG") == null) { - // We don't own `env_map` at this point, so turn it into a copy before modifying it. - env_map = arena.create(EnvMap) catch @panic("OOM"); - env_map.hash_map = try env_map.hash_map.cloneWithAllocator(arena); - try env_map.put("WINEDEBUG", "-all"); + // We don't own `env_map` at this point, so create a copy in order to modify it. + const new_env_map = arena.create(EnvMap) catch @panic("OOM"); + new_env_map.hash_map = try env_map.hash_map.cloneWithAllocator(arena); + try new_env_map.put("WINEDEBUG", "-all"); + + env_map = new_env_map; } } else { return failForeign(run, "-fwine", argv[0], exe); @@ -1737,7 +1739,7 @@ fn sendMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag) !void { .tag = tag, .bytes_len = 0, }; - try file.writeAll(std.mem.asBytes(&header)); + try file.writeAll(@ptrCast(&header)); } fn sendRunTestMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag, index: u32) !void { diff --git a/lib/std/Io/Reader.zig b/lib/std/Io/Reader.zig index f2a1ec7287..f25e113522 100644 --- a/lib/std/Io/Reader.zig +++ b/lib/std/Io/Reader.zig @@ -990,9 +990,9 @@ pub fn discardDelimiterLimit(r: *Reader, delimiter: u8, limit: Limit) DiscardDel /// Returns `error.EndOfStream` if and only if there are fewer than `n` bytes /// remaining. /// -/// Asserts buffer capacity is at least `n`. +/// If the end of stream is not encountered, asserts buffer capacity is at +/// least `n`. pub fn fill(r: *Reader, n: usize) Error!void { - assert(n <= r.buffer.len); if (r.seek + n <= r.end) { @branchHint(.likely); return; @@ -1108,9 +1108,9 @@ pub fn takeVarInt(r: *Reader, comptime Int: type, endian: std.builtin.Endian, n: /// Asserts the buffer was initialized with a capacity at least `@sizeOf(T)`. /// /// See also: -/// * `peekStructReference` +/// * `peekStructPointer` /// * `takeStruct` -pub fn takeStructReference(r: *Reader, comptime T: type) Error!*align(1) T { +pub fn takeStructPointer(r: *Reader, comptime T: type) Error!*align(1) T { // Only extern and packed structs have defined in-memory layout. comptime assert(@typeInfo(T).@"struct".layout != .auto); return @ptrCast(try r.takeArray(@sizeOf(T))); @@ -1122,9 +1122,9 @@ pub fn takeStructReference(r: *Reader, comptime T: type) Error!*align(1) T { /// Asserts the buffer was initialized with a capacity at least `@sizeOf(T)`. /// /// See also: -/// * `takeStructReference` +/// * `takeStructPointer` /// * `peekStruct` -pub fn peekStructReference(r: *Reader, comptime T: type) Error!*align(1) T { +pub fn peekStructPointer(r: *Reader, comptime T: type) Error!*align(1) T { // Only extern and packed structs have defined in-memory layout. comptime assert(@typeInfo(T).@"struct".layout != .auto); return @ptrCast(try r.peekArray(@sizeOf(T))); @@ -1136,19 +1136,19 @@ pub fn peekStructReference(r: *Reader, comptime T: type) Error!*align(1) T { /// when `endian` is comptime-known and matches the host endianness. /// /// See also: -/// * `takeStructReference` +/// * `takeStructPointer` /// * `peekStruct` pub inline fn takeStruct(r: *Reader, comptime T: type, endian: std.builtin.Endian) Error!T { switch (@typeInfo(T)) { .@"struct" => |info| switch (info.layout) { .auto => @compileError("ill-defined memory layout"), .@"extern" => { - var res = (try r.takeStructReference(T)).*; + var res = (try r.takeStructPointer(T)).*; if (native_endian != endian) std.mem.byteSwapAllFields(T, &res); return res; }, .@"packed" => { - return takeInt(r, info.backing_integer.?, endian); + return @bitCast(try takeInt(r, info.backing_integer.?, endian)); }, }, else => @compileError("not a struct"), @@ -1162,18 +1162,18 @@ pub inline fn takeStruct(r: *Reader, comptime T: type, endian: std.builtin.Endia /// /// See also: /// * `takeStruct` -/// * `peekStructReference` +/// * `peekStructPointer` pub inline fn peekStruct(r: *Reader, comptime T: type, endian: std.builtin.Endian) Error!T { switch (@typeInfo(T)) { .@"struct" => |info| switch (info.layout) { .auto => @compileError("ill-defined memory layout"), .@"extern" => { - var res = (try r.peekStructReference(T)).*; + var res = (try r.peekStructPointer(T)).*; if (native_endian != endian) std.mem.byteSwapAllFields(T, &res); return res; }, .@"packed" => { - return peekInt(r, info.backing_integer.?, endian); + return @bitCast(try peekInt(r, info.backing_integer.?, endian)); }, }, else => @compileError("not a struct"), @@ -1557,27 +1557,27 @@ test takeVarInt { try testing.expectError(error.EndOfStream, r.takeVarInt(u16, .little, 1)); } -test takeStructReference { +test takeStructPointer { var r: Reader = .fixed(&.{ 0x12, 0x00, 0x34, 0x56 }); const S = extern struct { a: u8, b: u16 }; switch (native_endian) { - .little => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.takeStructReference(S)).*), - .big => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.takeStructReference(S)).*), + .little => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.takeStructPointer(S)).*), + .big => try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.takeStructPointer(S)).*), } - try testing.expectError(error.EndOfStream, r.takeStructReference(S)); + try testing.expectError(error.EndOfStream, r.takeStructPointer(S)); } -test peekStructReference { +test peekStructPointer { var r: Reader = .fixed(&.{ 0x12, 0x00, 0x34, 0x56 }); const S = extern struct { a: u8, b: u16 }; switch (native_endian) { .little => { - try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructReference(S)).*); - try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructReference(S)).*); + try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructPointer(S)).*); + try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x5634 }), (try r.peekStructPointer(S)).*); }, .big => { - try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructReference(S)).*); - try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructReference(S)).*); + try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructPointer(S)).*); + try testing.expectEqual(@as(S, .{ .a = 0x12, .b = 0x3456 }), (try r.peekStructPointer(S)).*); }, } } @@ -1724,6 +1724,27 @@ test "takeDelimiterInclusive when it rebases" { } } +test "takeStruct and peekStruct packed" { + var r: Reader = .fixed(&.{ 0b11110000, 0b00110011 }); + const S = packed struct(u16) { a: u2, b: u6, c: u7, d: u1 }; + + try testing.expectEqual(@as(S, .{ + .a = 0b11, + .b = 0b001100, + .c = 0b1110000, + .d = 0b1, + }), try r.peekStruct(S, .big)); + + try testing.expectEqual(@as(S, .{ + .a = 0b11, + .b = 0b001100, + .c = 0b1110000, + .d = 0b1, + }), try r.takeStruct(S, .big)); + + try testing.expectError(error.EndOfStream, r.takeStruct(S, .little)); +} + /// Provides a `Reader` implementation by passing data from an underlying /// reader through `Hasher.update`. /// diff --git a/lib/std/Io/Writer.zig b/lib/std/Io/Writer.zig index 11bc05a00d..54d113ed91 100644 --- a/lib/std/Io/Writer.zig +++ b/lib/std/Io/Writer.zig @@ -867,18 +867,11 @@ pub inline fn writeSliceEndian( } } -/// Asserts that the buffer storage capacity is at least enough to store `@sizeOf(Elem)` -/// -/// Asserts that the buffer is aligned enough for `@alignOf(Elem)`. pub fn writeSliceSwap(w: *Writer, Elem: type, slice: []const Elem) Error!void { - var i: usize = 0; - while (i < slice.len) { - const dest_bytes = try w.writableSliceGreedy(@sizeOf(Elem)); - const dest: []Elem = @alignCast(@ptrCast(dest_bytes[0 .. dest_bytes.len - dest_bytes.len % @sizeOf(Elem)])); - const copy_len = @min(dest.len, slice.len - i); - @memcpy(dest[0..copy_len], slice[i..][0..copy_len]); - i += copy_len; - std.mem.byteSwapAllElements(Elem, dest); + for (slice) |elem| { + var tmp = elem; + std.mem.byteSwapAllFields(Elem, &tmp); + try w.writeAll(@ptrCast(&tmp)); } } @@ -1141,8 +1134,8 @@ pub fn printValue( else => invalidFmtError(fmt, value), }, 't' => switch (@typeInfo(T)) { - .error_set => return w.writeAll(@errorName(value)), - .@"enum", .@"union" => return w.writeAll(@tagName(value)), + .error_set => return w.alignBufferOptions(@errorName(value), options), + .@"enum", .@"union" => return w.alignBufferOptions(@tagName(value), options), else => invalidFmtError(fmt, value), }, else => {}, @@ -2152,6 +2145,14 @@ test "bytes.hex" { try testing.expectFmt("lowercase: 000ebabe\n", "lowercase: {x}\n", .{bytes_with_zeros}); } +test "padding" { + const foo: enum { foo } = .foo; + try testing.expectFmt("tag: |foo |\n", "tag: |{t:<4}|\n", .{foo}); + + const bar: error{bar} = error.bar; + try testing.expectFmt("error: |bar |\n", "error: |{t:<4}|\n", .{bar}); +} + test fixed { { var buf: [255]u8 = undefined; @@ -2650,9 +2651,10 @@ test writeStruct { } test writeSliceEndian { - var buffer: [4]u8 align(2) = undefined; + var buffer: [5]u8 align(2) = undefined; var w: Writer = .fixed(&buffer); + try w.writeByte('x'); const array: [2]u16 = .{ 0x1234, 0x5678 }; try writeSliceEndian(&w, u16, &array, .big); - try testing.expectEqualSlices(u8, &.{ 0x12, 0x34, 0x56, 0x78 }, &buffer); + try testing.expectEqualSlices(u8, &.{ 'x', 0x12, 0x34, 0x56, 0x78 }, &buffer); } diff --git a/lib/std/hash.zig b/lib/std/hash.zig index 27107e1ddf..77a25550f3 100644 --- a/lib/std/hash.zig +++ b/lib/std/hash.zig @@ -31,8 +31,6 @@ pub const CityHash64 = cityhash.CityHash64; const wyhash = @import("hash/wyhash.zig"); pub const Wyhash = wyhash.Wyhash; -pub const RapidHash = @import("hash/RapidHash.zig"); - const xxhash = @import("hash/xxhash.zig"); pub const XxHash3 = xxhash.XxHash3; pub const XxHash64 = xxhash.XxHash64; diff --git a/lib/std/hash/RapidHash.zig b/lib/std/hash/RapidHash.zig deleted file mode 100644 index 030c570df9..0000000000 --- a/lib/std/hash/RapidHash.zig +++ /dev/null @@ -1,125 +0,0 @@ -const std = @import("std"); - -const readInt = std.mem.readInt; -const assert = std.debug.assert; -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -const RAPID_SEED: u64 = 0xbdd89aa982704029; -const RAPID_SECRET: [3]u64 = .{ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3 }; - -pub fn hash(seed: u64, input: []const u8) u64 { - const sc = RAPID_SECRET; - const len = input.len; - var a: u64 = 0; - var b: u64 = 0; - var k = input; - var is: [3]u64 = .{ seed, 0, 0 }; - - is[0] ^= mix(seed ^ sc[0], sc[1]) ^ len; - - if (len <= 16) { - if (len >= 4) { - const d: u64 = ((len & 24) >> @intCast(len >> 3)); - const e = len - 4; - a = (r32(k) << 32) | r32(k[e..]); - b = ((r32(k[d..]) << 32) | r32(k[(e - d)..])); - } else if (len > 0) - a = (@as(u64, k[0]) << 56) | (@as(u64, k[len >> 1]) << 32) | @as(u64, k[len - 1]); - } else { - var remain = len; - if (len > 48) { - is[1] = is[0]; - is[2] = is[0]; - while (remain >= 96) { - inline for (0..6) |i| { - const m1 = r64(k[8 * i * 2 ..]); - const m2 = r64(k[8 * (i * 2 + 1) ..]); - is[i % 3] = mix(m1 ^ sc[i % 3], m2 ^ is[i % 3]); - } - k = k[96..]; - remain -= 96; - } - if (remain >= 48) { - inline for (0..3) |i| { - const m1 = r64(k[8 * i * 2 ..]); - const m2 = r64(k[8 * (i * 2 + 1) ..]); - is[i] = mix(m1 ^ sc[i], m2 ^ is[i]); - } - k = k[48..]; - remain -= 48; - } - - is[0] ^= is[1] ^ is[2]; - } - - if (remain > 16) { - is[0] = mix(r64(k) ^ sc[2], r64(k[8..]) ^ is[0] ^ sc[1]); - if (remain > 32) { - is[0] = mix(r64(k[16..]) ^ sc[2], r64(k[24..]) ^ is[0]); - } - } - - a = r64(input[len - 16 ..]); - b = r64(input[len - 8 ..]); - } - - a ^= sc[1]; - b ^= is[0]; - mum(&a, &b); - return mix(a ^ sc[0] ^ len, b ^ sc[1]); -} - -test "RapidHash.hash" { - const bytes: []const u8 = "abcdefgh" ** 128; - - const sizes: [13]u64 = .{ 0, 1, 2, 3, 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; - - const outcomes: [13]u64 = .{ - 0x5a6ef77074ebc84b, - 0xc11328477bc0f5d1, - 0x5644ac035e40d569, - 0x347080fbf5fcd81, - 0x56b66b8dc802bcc, - 0xb6bf9055973aac7c, - 0xed56d62eead1e402, - 0xc19072d767da8ffb, - 0x89bb40a9928a4f0d, - 0xe0af7c5e7b6e29fd, - 0x9a3ed35fbedfa11a, - 0x4c684b2119ca19fb, - 0x4b575f5bf25600d6, - }; - - var success: bool = true; - for (sizes, outcomes) |s, e| { - const r = hash(RAPID_SEED, bytes[0..s]); - - expectEqual(e, r) catch |err| { - std.debug.print("Failed on {d}: {!}\n", .{ s, err }); - success = false; - }; - } - try expect(success); -} - -inline fn mum(a: *u64, b: *u64) void { - const r = @as(u128, a.*) * b.*; - a.* = @truncate(r); - b.* = @truncate(r >> 64); -} - -inline fn mix(a: u64, b: u64) u64 { - var copy_a = a; - var copy_b = b; - mum(©_a, ©_b); - return copy_a ^ copy_b; -} - -inline fn r64(p: []const u8) u64 { - return readInt(u64, p[0..8], .little); -} - -inline fn r32(p: []const u8) u64 { - return readInt(u32, p[0..4], .little); -} diff --git a/lib/std/hash/benchmark.zig b/lib/std/hash/benchmark.zig index b899c9d429..33cb3432bf 100644 --- a/lib/std/hash/benchmark.zig +++ b/lib/std/hash/benchmark.zig @@ -59,12 +59,6 @@ const hashes = [_]Hash{ .ty = hash.crc.Crc32, .name = "crc32", }, - Hash{ - .ty = hash.RapidHash, - .name = "rapidhash", - .has_iterative_api = false, - .init_u64 = 0, - }, Hash{ .ty = hash.CityHash32, .name = "cityhash-32", diff --git a/lib/std/json.zig b/lib/std/json.zig index 246c98817e..f81ac1cd65 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -44,7 +44,7 @@ test Value { test Stringify { var out: std.io.Writer.Allocating = .init(testing.allocator); var write_stream: Stringify = .{ - .writer = &out.interface, + .writer = &out.writer, .options = .{ .whitespace = .indent_2 }, }; defer out.deinit(); @@ -66,18 +66,18 @@ pub const Value = @import("json/dynamic.zig").Value; pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap; -pub const validate = @import("json/scanner.zig").validate; -pub const Error = @import("json/scanner.zig").Error; -pub const reader = @import("json/scanner.zig").reader; -pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size; -pub const Token = @import("json/scanner.zig").Token; -pub const TokenType = @import("json/scanner.zig").TokenType; -pub const Diagnostics = @import("json/scanner.zig").Diagnostics; -pub const AllocWhen = @import("json/scanner.zig").AllocWhen; -pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len; -pub const Reader = @import("json/scanner.zig").Reader; -pub const Scanner = @import("json/scanner.zig").Scanner; -pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger; +pub const Scanner = @import("json/Scanner.zig"); +pub const validate = Scanner.validate; +pub const Error = Scanner.Error; +pub const reader = Scanner.reader; +pub const default_buffer_size = Scanner.default_buffer_size; +pub const Token = Scanner.Token; +pub const TokenType = Scanner.TokenType; +pub const Diagnostics = Scanner.Diagnostics; +pub const AllocWhen = Scanner.AllocWhen; +pub const default_max_value_len = Scanner.default_max_value_len; +pub const Reader = Scanner.Reader; +pub const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger; pub const ParseOptions = @import("json/static.zig").ParseOptions; pub const Parsed = @import("json/static.zig").Parsed; @@ -101,10 +101,10 @@ pub fn fmt(value: anytype, options: Stringify.Options) Formatter(@TypeOf(value)) test fmt { const expectFmt = std.testing.expectFmt; - try expectFmt("123", "{}", .{fmt(@as(u32, 123), .{})}); + try expectFmt("123", "{f}", .{fmt(@as(u32, 123), .{})}); try expectFmt( \\{"num":927,"msg":"hello","sub":{"mybool":true}} - , "{}", .{fmt(struct { + , "{f}", .{fmt(struct { num: u32, msg: []const u8, sub: struct { @@ -123,14 +123,7 @@ pub fn Formatter(comptime T: type) type { value: T, options: Stringify.Options, - pub fn format( - self: @This(), - comptime fmt_spec: []const u8, - options: std.fmt.FormatOptions, - writer: *std.io.Writer, - ) !void { - comptime std.debug.assert(fmt_spec.len == 0); - _ = options; + pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void { try Stringify.value(self.value, self.options, writer); } }; @@ -138,7 +131,7 @@ pub fn Formatter(comptime T: type) type { test { _ = @import("json/test.zig"); - _ = @import("json/scanner.zig"); + _ = Scanner; _ = @import("json/dynamic.zig"); _ = @import("json/hashmap.zig"); _ = @import("json/static.zig"); diff --git a/lib/std/json/Scanner.zig b/lib/std/json/Scanner.zig new file mode 100644 index 0000000000..b9c3c506a5 --- /dev/null +++ b/lib/std/json/Scanner.zig @@ -0,0 +1,1767 @@ +//! The lowest level parsing API in this package; +//! supports streaming input with a low memory footprint. +//! The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input. +//! Specifically `d/8` bytes are required for this purpose, +//! with some extra buffer according to the implementation of `std.ArrayList`. +//! +//! This scanner can emit partial tokens; see `std.json.Token`. +//! The input to this class is a sequence of input buffers that you must supply one at a time. +//! Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned. +//! Then call `feedInput()` again and so forth. +//! Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`, +//! or when `error.BufferUnderrun` requests more data and there is no more. +//! Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned. +//! +//! Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259 +//! * RFC 8259 requires JSON documents be valid UTF-8, +//! but makes an allowance for systems that are "part of a closed ecosystem". +//! I have no idea what that's supposed to mean in the context of a standard specification. +//! This implementation requires inputs to be valid UTF-8. +//! * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits, +//! but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed. +//! (RFC 5234 defines HEXDIG to only allow uppercase.) +//! * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value". +//! See http://www.unicode.org/glossary/#unicode_scalar_value . +//! * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences, +//! but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?), +//! which would mean that unpaired surrogate halves are forbidden. +//! By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to) +//! explicitly allows unpaired surrogate halves. +//! This implementation forbids unpaired surrogate halves in \u sequences. +//! If a high surrogate half appears in a \u sequence, +//! then a low surrogate half must immediately follow in \u notation. +//! * RFC 8259 allows implementations to "accept non-JSON forms or extensions". +//! This implementation does not accept any of that. +//! * RFC 8259 allows implementations to put limits on "the size of texts", +//! "the maximum depth of nesting", "the range and precision of numbers", +//! and "the length and character contents of strings". +//! This low-level implementation does not limit these, +//! except where noted above, and except that nesting depth requires memory allocation. +//! Note that this low-level API does not interpret numbers numerically, +//! but simply emits their source form for some higher level code to make sense of. +//! * This low-level implementation allows duplicate object keys, +//! and key/value pairs are emitted in the order they appear in the input. + +const Scanner = @This(); +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const assert = std.debug.assert; +const BitStack = std.BitStack; + +state: State = .value, +string_is_object_key: bool = false, +stack: BitStack, +value_start: usize = undefined, +utf16_code_units: [2]u16 = undefined, + +input: []const u8 = "", +cursor: usize = 0, +is_end_of_input: bool = false, +diagnostics: ?*Diagnostics = null, + +/// The allocator is only used to track `[]` and `{}` nesting levels. +pub fn initStreaming(allocator: Allocator) @This() { + return .{ + .stack = BitStack.init(allocator), + }; +} +/// Use this if your input is a single slice. +/// This is effectively equivalent to: +/// ``` +/// initStreaming(allocator); +/// feedInput(complete_input); +/// endInput(); +/// ``` +pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() { + return .{ + .stack = BitStack.init(allocator), + .input = complete_input, + .is_end_of_input = true, + }; +} +pub fn deinit(self: *@This()) void { + self.stack.deinit(); + self.* = undefined; +} + +pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { + diagnostics.cursor_pointer = &self.cursor; + self.diagnostics = diagnostics; +} + +/// Call this whenever you get `error.BufferUnderrun` from `next()`. +/// When there is no more input to provide, call `endInput()`. +pub fn feedInput(self: *@This(), input: []const u8) void { + assert(self.cursor == self.input.len); // Not done with the last input slice. + if (self.diagnostics) |diag| { + diag.total_bytes_before_current_input += self.input.len; + // This usually goes "negative" to measure how far before the beginning + // of the new buffer the current line started. + diag.line_start_cursor -%= self.cursor; + } + self.input = input; + self.cursor = 0; + self.value_start = 0; +} +/// Call this when you will no longer call `feedInput()` anymore. +/// This can be called either immediately after the last `feedInput()`, +/// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`. +/// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`. +pub fn endInput(self: *@This()) void { + self.is_end_of_input = true; +} + +pub const NextError = Error || Allocator.Error || error{BufferUnderrun}; +pub const AllocError = Error || Allocator.Error || error{ValueTooLong}; +pub const PeekError = Error || error{BufferUnderrun}; +pub const SkipError = Error || Allocator.Error; +pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun}; + +/// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` +/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. +/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. +pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { + return self.nextAllocMax(allocator, when, default_max_value_len); +} + +/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. +/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. +pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { + assert(self.is_end_of_input); // This function is not available in streaming mode. + const token_type = self.peekNextTokenType() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + switch (token_type) { + .number, .string => { + var value_list = ArrayList(u8).init(allocator); + errdefer { + value_list.deinit(); + } + if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) |slice| { + return if (token_type == .number) + Token{ .number = slice } + else + Token{ .string = slice }; + } else { + return if (token_type == .number) + Token{ .allocated_number = try value_list.toOwnedSlice() } + else + Token{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + + // Simple tokens never alloc. + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => return self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }, + } +} + +/// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` +pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 { + return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); +} +/// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`. +/// When allocation is not necessary with `.alloc_if_needed`, +/// this method returns the content slice from the input buffer, and `value_list` is not touched. +/// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`, +/// and returns `null` once the final `.number` or `.string` token has been written into it. +/// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list. +/// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation +/// can be resumed by passing the same array list in again. +/// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type; +/// the caller of this method is expected to know which type of token is being processed. +pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 { + while (true) { + const token = try self.next(); + switch (token) { + // Accumulate partial values. + .partial_number, .partial_string => |slice| { + try appendSlice(value_list, slice, max_value_len); + }, + .partial_string_escaped_1 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_2 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_3 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_4 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + + // Return complete values. + .number => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary. + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + .string => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary. + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this. + + .allocated_number, .allocated_string => unreachable, + } + } +} + +/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. +/// If the next token type is `.object_begin` or `.array_begin`, +/// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found. +/// If the next token type is `.number` or `.string`, +/// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found. +/// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once. +/// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`; +/// see `peekNextTokenType()`. +pub fn skipValue(self: *@This()) SkipError!void { + assert(self.is_end_of_input); // This function is not available in streaming mode. + switch (self.peekNextTokenType() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) { + .object_begin, .array_begin => { + self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + }, + .number, .string => { + while (true) { + switch (self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) { + .partial_number, + .partial_string, + .partial_string_escaped_1, + .partial_string_escaped_2, + .partial_string_escaped_3, + .partial_string_escaped_4, + => continue, + + .number, .string => break, + + else => unreachable, + } + } + }, + .true, .false, .null => { + _ = self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + }, + + .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. + } +} + +/// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height. +/// Unlike `skipValue()`, this function is available in streaming mode. +pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void { + while (true) { + switch (try self.next()) { + .object_end, .array_end => { + if (self.stackHeight() == terminal_stack_height) break; + }, + .end_of_document => unreachable, + else => continue, + } + } +} + +/// The depth of `{}` or `[]` nesting levels at the current position. +pub fn stackHeight(self: *const @This()) usize { + return self.stack.bit_len; +} + +/// Pre allocate memory to hold the given number of nesting levels. +/// `stackHeight()` up to the given number will not cause allocations. +pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { + try self.stack.ensureTotalCapacity(height); +} + +/// See `std.json.Token` for documentation of this function. +pub fn next(self: *@This()) NextError!Token { + state_loop: while (true) { + switch (self.state) { + .value => { + switch (try self.skipWhitespaceExpectByte()) { + // Object, Array + '{' => { + try self.stack.push(OBJECT_MODE); + self.cursor += 1; + self.state = .object_start; + return .object_begin; + }, + '[' => { + try self.stack.push(ARRAY_MODE); + self.cursor += 1; + self.state = .array_start; + return .array_begin; + }, + + // String + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + continue :state_loop; + }, + + // Number + '1'...'9' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_int; + continue :state_loop; + }, + '0' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_leading_zero; + continue :state_loop; + }, + '-' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_minus; + continue :state_loop; + }, + + // literal values + 't' => { + self.cursor += 1; + self.state = .literal_t; + continue :state_loop; + }, + 'f' => { + self.cursor += 1; + self.state = .literal_f; + continue :state_loop; + }, + 'n' => { + self.cursor += 1; + self.state = .literal_n; + continue :state_loop; + }, + + else => return error.SyntaxError, + } + }, + + .post_value => { + if (try self.skipWhitespaceCheckEnd()) return .end_of_document; + + const c = self.input[self.cursor]; + if (self.string_is_object_key) { + self.string_is_object_key = false; + switch (c) { + ':' => { + self.cursor += 1; + self.state = .value; + continue :state_loop; + }, + else => return error.SyntaxError, + } + } + + switch (c) { + '}' => { + if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError; + self.cursor += 1; + // stay in .post_value state. + return .object_end; + }, + ']' => { + if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError; + self.cursor += 1; + // stay in .post_value state. + return .array_end; + }, + ',' => { + switch (self.stack.peek()) { + OBJECT_MODE => { + self.state = .object_post_comma; + }, + ARRAY_MODE => { + self.state = .value; + }, + } + self.cursor += 1; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .object_start => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + self.string_is_object_key = true; + continue :state_loop; + }, + '}' => { + self.cursor += 1; + _ = self.stack.pop(); + self.state = .post_value; + return .object_end; + }, + else => return error.SyntaxError, + } + }, + .object_post_comma => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + self.string_is_object_key = true; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .array_start => { + switch (try self.skipWhitespaceExpectByte()) { + ']' => { + self.cursor += 1; + _ = self.stack.pop(); + self.state = .post_value; + return .array_end; + }, + else => { + self.state = .value; + continue :state_loop; + }, + } + }, + + .number_minus => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0' => { + self.cursor += 1; + self.state = .number_leading_zero; + continue :state_loop; + }, + '1'...'9' => { + self.cursor += 1; + self.state = .number_int; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_leading_zero => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true); + switch (self.input[self.cursor]) { + '.' => { + self.cursor += 1; + self.state = .number_post_dot; + continue :state_loop; + }, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + }, + .number_int => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + '.' => { + self.cursor += 1; + self.state = .number_post_dot; + continue :state_loop; + }, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + .number_post_dot => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_frac; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_frac => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + .number_post_e => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_exp; + continue :state_loop; + }, + '+', '-' => { + self.cursor += 1; + self.state = .number_post_e_sign; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_post_e_sign => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_exp; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_exp => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + + .string => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string. + + // ASCII plain text. + 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue, + + // Special characters. + '"' => { + const result = Token{ .string = self.takeValueSlice() }; + self.cursor += 1; + self.state = .post_value; + return result; + }, + '\\' => { + const slice = self.takeValueSlice(); + self.cursor += 1; + self.state = .string_backslash; + if (slice.len > 0) return Token{ .partial_string = slice }; + continue :state_loop; + }, + + // UTF-8 validation. + // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + 0xC2...0xDF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + 0xE0 => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte_guard_against_overlong; + continue :state_loop; + }, + 0xE1...0xEC, 0xEE...0xEF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + 0xED => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half; + continue :state_loop; + }, + 0xF0 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte_guard_against_overlong; + continue :state_loop; + }, + 0xF1...0xF3 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte; + continue :state_loop; + }, + 0xF4 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte_guard_against_too_large; + continue :state_loop; + }, + 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8. + } + } + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + const slice = self.takeValueSlice(); + if (slice.len > 0) return Token{ .partial_string = slice }; + return error.BufferUnderrun; + }, + .string_backslash => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + '"', '\\', '/' => { + // Since these characters now represent themselves literally, + // we can simply begin the next plaintext slice here. + self.value_start = self.cursor; + self.cursor += 1; + self.state = .string; + continue :state_loop; + }, + 'b' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{0x08} }; + }, + 'f' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{0x0c} }; + }, + 'n' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\n'} }; + }, + 'r' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\r'} }; + }, + 't' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\t'} }; + }, + 'u' => { + self.cursor += 1; + self.state = .string_backslash_u; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .string_backslash_u => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] = @as(u16, c - '0') << 12; + }, + 'A'...'F' => { + self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12; + }, + 'a'...'f' => { + self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_1; + continue :state_loop; + }, + .string_backslash_u_1 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] |= @as(u16, c - '0') << 8; + }, + 'A'...'F' => { + self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8; + }, + 'a'...'f' => { + self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_2; + continue :state_loop; + }, + .string_backslash_u_2 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] |= @as(u16, c - '0') << 4; + }, + 'A'...'F' => { + self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4; + }, + 'a'...'f' => { + self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_3; + continue :state_loop; + }, + .string_backslash_u_3 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[0] |= c - '0'; + }, + 'A'...'F' => { + self.utf16_code_units[0] |= c - 'A' + 10; + }, + 'a'...'f' => { + self.utf16_code_units[0] |= c - 'a' + 10; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) { + self.state = .string_surrogate_half; + continue :state_loop; + } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) { + return error.SyntaxError; // Unexpected low surrogate half. + } else { + self.value_start = self.cursor; + self.state = .string; + return partialStringCodepoint(self.utf16_code_units[0]); + } + }, + .string_surrogate_half => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + '\\' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 'u' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash_u; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 'D', 'd' => { + self.cursor += 1; + self.utf16_code_units[1] = 0xD << 12; + self.state = .string_surrogate_half_backslash_u_1; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u_1 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + 'C'...'F' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8; + self.state = .string_surrogate_half_backslash_u_2; + continue :state_loop; + }, + 'c'...'f' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8; + self.state = .string_surrogate_half_backslash_u_2; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u_2 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - '0') << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + 'A'...'F' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + 'a'...'f' => { + self.cursor += 1; + self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .string_surrogate_half_backslash_u_3 => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + const c = self.input[self.cursor]; + switch (c) { + '0'...'9' => { + self.utf16_code_units[1] |= c - '0'; + }, + 'A'...'F' => { + self.utf16_code_units[1] |= c - 'A' + 10; + }, + 'a'...'f' => { + self.utf16_code_units[1] |= c - 'a' + 10; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable; + return partialStringCodepoint(code_point); + }, + + .string_utf8_last_byte => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte_guard_against_overlong => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0xA0...0xBF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte_guard_against_surrogate_half => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0x9F => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte_guard_against_overlong => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x90...0xBF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte_guard_against_too_large => { + if (self.cursor >= self.input.len) return self.endOfBufferInString(); + switch (self.input[self.cursor]) { + 0x80...0x8F => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + + .literal_t => { + switch (try self.expectByte()) { + 'r' => { + self.cursor += 1; + self.state = .literal_tr; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_tr => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .literal_tru; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_tru => { + switch (try self.expectByte()) { + 'e' => { + self.cursor += 1; + self.state = .post_value; + return .true; + }, + else => return error.SyntaxError, + } + }, + .literal_f => { + switch (try self.expectByte()) { + 'a' => { + self.cursor += 1; + self.state = .literal_fa; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fa => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .literal_fal; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fal => { + switch (try self.expectByte()) { + 's' => { + self.cursor += 1; + self.state = .literal_fals; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fals => { + switch (try self.expectByte()) { + 'e' => { + self.cursor += 1; + self.state = .post_value; + return .false; + }, + else => return error.SyntaxError, + } + }, + .literal_n => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .literal_nu; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_nu => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .literal_nul; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_nul => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .post_value; + return .null; + }, + else => return error.SyntaxError, + } + }, + } + unreachable; + } +} + +/// Seeks ahead in the input until the first byte of the next token (or the end of the input) +/// determines which type of token will be returned from the next `next*()` call. +/// This function is idempotent, only advancing past commas, colons, and inter-token whitespace. +pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { + state_loop: while (true) { + switch (self.state) { + .value => { + switch (try self.skipWhitespaceExpectByte()) { + '{' => return .object_begin, + '[' => return .array_begin, + '"' => return .string, + '-', '0'...'9' => return .number, + 't' => return .true, + 'f' => return .false, + 'n' => return .null, + else => return error.SyntaxError, + } + }, + + .post_value => { + if (try self.skipWhitespaceCheckEnd()) return .end_of_document; + + const c = self.input[self.cursor]; + if (self.string_is_object_key) { + self.string_is_object_key = false; + switch (c) { + ':' => { + self.cursor += 1; + self.state = .value; + continue :state_loop; + }, + else => return error.SyntaxError, + } + } + + switch (c) { + '}' => return .object_end, + ']' => return .array_end, + ',' => { + switch (self.stack.peek()) { + OBJECT_MODE => { + self.state = .object_post_comma; + }, + ARRAY_MODE => { + self.state = .value; + }, + } + self.cursor += 1; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .object_start => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => return .string, + '}' => return .object_end, + else => return error.SyntaxError, + } + }, + .object_post_comma => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => return .string, + else => return error.SyntaxError, + } + }, + + .array_start => { + switch (try self.skipWhitespaceExpectByte()) { + ']' => return .array_end, + else => { + self.state = .value; + continue :state_loop; + }, + } + }, + + .number_minus, + .number_leading_zero, + .number_int, + .number_post_dot, + .number_frac, + .number_post_e, + .number_post_e_sign, + .number_exp, + => return .number, + + .string, + .string_backslash, + .string_backslash_u, + .string_backslash_u_1, + .string_backslash_u_2, + .string_backslash_u_3, + .string_surrogate_half, + .string_surrogate_half_backslash, + .string_surrogate_half_backslash_u, + .string_surrogate_half_backslash_u_1, + .string_surrogate_half_backslash_u_2, + .string_surrogate_half_backslash_u_3, + => return .string, + + .string_utf8_last_byte, + .string_utf8_second_to_last_byte, + .string_utf8_second_to_last_byte_guard_against_overlong, + .string_utf8_second_to_last_byte_guard_against_surrogate_half, + .string_utf8_third_to_last_byte, + .string_utf8_third_to_last_byte_guard_against_overlong, + .string_utf8_third_to_last_byte_guard_against_too_large, + => return .string, + + .literal_t, + .literal_tr, + .literal_tru, + => return .true, + .literal_f, + .literal_fa, + .literal_fal, + .literal_fals, + => return .false, + .literal_n, + .literal_nu, + .literal_nul, + => return .null, + } + unreachable; + } +} + +const State = enum { + value, + post_value, + + object_start, + object_post_comma, + + array_start, + + number_minus, + number_leading_zero, + number_int, + number_post_dot, + number_frac, + number_post_e, + number_post_e_sign, + number_exp, + + string, + string_backslash, + string_backslash_u, + string_backslash_u_1, + string_backslash_u_2, + string_backslash_u_3, + string_surrogate_half, + string_surrogate_half_backslash, + string_surrogate_half_backslash_u, + string_surrogate_half_backslash_u_1, + string_surrogate_half_backslash_u_2, + string_surrogate_half_backslash_u_3, + + // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + string_utf8_last_byte, // State A + string_utf8_second_to_last_byte, // State B + string_utf8_second_to_last_byte_guard_against_overlong, // State C + string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D + string_utf8_third_to_last_byte, // State E + string_utf8_third_to_last_byte_guard_against_overlong, // State F + string_utf8_third_to_last_byte_guard_against_too_large, // State G + + literal_t, + literal_tr, + literal_tru, + literal_f, + literal_fa, + literal_fal, + literal_fals, + literal_n, + literal_nu, + literal_nul, +}; + +fn expectByte(self: *const @This()) !u8 { + if (self.cursor < self.input.len) { + return self.input[self.cursor]; + } + // No byte. + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + return error.BufferUnderrun; +} + +fn skipWhitespace(self: *@This()) void { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + // Whitespace + ' ', '\t', '\r' => continue, + '\n' => { + if (self.diagnostics) |diag| { + diag.line_number += 1; + // This will count the newline itself, + // which means a straight-forward subtraction will give a 1-based column number. + diag.line_start_cursor = self.cursor; + } + continue; + }, + else => return, + } + } +} + +fn skipWhitespaceExpectByte(self: *@This()) !u8 { + self.skipWhitespace(); + return self.expectByte(); +} + +fn skipWhitespaceCheckEnd(self: *@This()) !bool { + self.skipWhitespace(); + if (self.cursor >= self.input.len) { + // End of buffer. + if (self.is_end_of_input) { + // End of everything. + if (self.stackHeight() == 0) { + // We did it! + return true; + } + return error.UnexpectedEndOfInput; + } + return error.BufferUnderrun; + } + if (self.stackHeight() == 0) return error.SyntaxError; + return false; +} + +fn takeValueSlice(self: *@This()) []const u8 { + const slice = self.input[self.value_start..self.cursor]; + self.value_start = self.cursor; + return slice; +} +fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 { + // Check if the escape sequence started before the current input buffer. + // (The algebra here is awkward to avoid unsigned underflow, + // but it's just making sure the slice on the next line isn't UB.) + if (self.cursor <= self.value_start + trailing_negative_offset) return ""; + const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset]; + // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter, + // because we always set it again while emitting the .partial_string_escaped_*. + self.value_start = self.cursor; + return slice; +} + +fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token { + const slice = self.takeValueSlice(); + if (self.is_end_of_input) { + if (!allow_end) return error.UnexpectedEndOfInput; + self.state = .post_value; + return Token{ .number = slice }; + } + if (slice.len == 0) return error.BufferUnderrun; + return Token{ .partial_number = slice }; +} + +fn endOfBufferInString(self: *@This()) !Token { + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) { + // Don't include the escape sequence in the partial string. + .string_backslash => 1, + .string_backslash_u => 2, + .string_backslash_u_1 => 3, + .string_backslash_u_2 => 4, + .string_backslash_u_3 => 5, + .string_surrogate_half => 6, + .string_surrogate_half_backslash => 7, + .string_surrogate_half_backslash_u => 8, + .string_surrogate_half_backslash_u_1 => 9, + .string_surrogate_half_backslash_u_2 => 10, + .string_surrogate_half_backslash_u_3 => 11, + + // Include everything up to the cursor otherwise. + .string, + .string_utf8_last_byte, + .string_utf8_second_to_last_byte, + .string_utf8_second_to_last_byte_guard_against_overlong, + .string_utf8_second_to_last_byte_guard_against_surrogate_half, + .string_utf8_third_to_last_byte, + .string_utf8_third_to_last_byte_guard_against_overlong, + .string_utf8_third_to_last_byte_guard_against_too_large, + => 0, + + else => unreachable, + }); + if (slice.len == 0) return error.BufferUnderrun; + return Token{ .partial_string = slice }; +} + +fn partialStringCodepoint(code_point: u21) Token { + var buf: [4]u8 = undefined; + switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) { + 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* }, + 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* }, + 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* }, + 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* }, + else => unreachable, + } +} + +/// Scan the input and check for malformed JSON. +/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`. +/// Returns any errors from the allocator as-is, which is unlikely, +/// but can be caused by extreme nesting depth in the input. +pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool { + var scanner = Scanner.initCompleteInput(allocator, s); + defer scanner.deinit(); + + while (true) { + const token = scanner.next() catch |err| switch (err) { + error.SyntaxError, error.UnexpectedEndOfInput => return false, + error.OutOfMemory => return error.OutOfMemory, + error.BufferUnderrun => unreachable, + }; + if (token == .end_of_document) break; + } + + return true; +} + +/// The parsing errors are divided into two categories: +/// * `SyntaxError` is for clearly malformed JSON documents, +/// such as giving an input document that isn't JSON at all. +/// * `UnexpectedEndOfInput` is for signaling that everything's been +/// valid so far, but the input appears to be truncated for some reason. +/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`. +pub const Error = error{ SyntaxError, UnexpectedEndOfInput }; + +/// Used by `json.reader`. +pub const default_buffer_size = 0x1000; + +/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar: +/// ``` +/// = .end_of_document +/// = +/// | +/// | +/// | +/// | +/// | .true +/// | .false +/// | .null +/// = .object_begin ( )* .object_end +/// = .array_begin ( )* .array_end +/// = +/// = +/// ``` +/// +/// What you get for `` and `` values depends on which `next*()` method you call: +/// +/// ``` +/// next(): +/// = ( .partial_number )* .number +/// = ( )* .string +/// = +/// | .partial_string +/// | .partial_string_escaped_1 +/// | .partial_string_escaped_2 +/// | .partial_string_escaped_3 +/// | .partial_string_escaped_4 +/// +/// nextAlloc*(..., .alloc_always): +/// = .allocated_number +/// = .allocated_string +/// +/// nextAlloc*(..., .alloc_if_needed): +/// = +/// | .number +/// | .allocated_number +/// = +/// | .string +/// | .allocated_string +/// ``` +/// +/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value. +/// For number values, this is the representation of the number exactly as it appears in the input. +/// For strings, this is the content of the string after resolving escape sequences. +/// +/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator. +/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations. +/// +/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences. +/// To get a complete value in memory, you need to concatenate the values yourself. +/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result. +/// +/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer. +/// The memory may become undefined during the next call to `json.Scanner.feedInput()` +/// or any `json.Reader` method whose return error set includes `json.Error`. +/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`, +/// which makes a copy for you. +/// +/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that +/// the previously partial value is completed with no additional bytes. +/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.) +/// `.partial_*` tokens never have `0` length. +/// +/// The recommended strategy for using the different `next*()` methods is something like this: +/// +/// When you're expecting an object key, use `.alloc_if_needed`. +/// You often don't need a copy of the key string to persist; you might just check which field it is. +/// In the case that the key happens to require an allocation, free it immediately after checking it. +/// +/// When you're expecting a meaningful string value (such as on the right of a `:`), +/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document. +/// +/// When you're expecting a number value, use `.alloc_if_needed`. +/// You're probably going to be parsing the string representation of the number into a numeric representation, +/// so you need the complete string representation only temporarily. +/// +/// When you're skipping an unrecognized value, use `skipValue()`. +pub const Token = union(enum) { + object_begin, + object_end, + array_begin, + array_end, + + true, + false, + null, + + number: []const u8, + partial_number: []const u8, + allocated_number: []u8, + + string: []const u8, + partial_string: []const u8, + partial_string_escaped_1: [1]u8, + partial_string_escaped_2: [2]u8, + partial_string_escaped_3: [3]u8, + partial_string_escaped_4: [4]u8, + allocated_string: []u8, + + end_of_document, +}; + +/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call. +pub const TokenType = enum { + object_begin, + object_end, + array_begin, + array_end, + true, + false, + null, + number, + string, + end_of_document, +}; + +/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);` +/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized. +/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()` +/// to get meaningful information from this. +pub const Diagnostics = struct { + line_number: u64 = 1, + line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1. + total_bytes_before_current_input: u64 = 0, + cursor_pointer: *const usize = undefined, + + /// Starts at 1. + pub fn getLine(self: *const @This()) u64 { + return self.line_number; + } + /// Starts at 1. + pub fn getColumn(self: *const @This()) u64 { + return self.cursor_pointer.* -% self.line_start_cursor; + } + /// Starts at 0. Measures the byte offset since the start of the input. + pub fn getByteOffset(self: *const @This()) u64 { + return self.total_bytes_before_current_input + self.cursor_pointer.*; + } +}; + +/// See the documentation for `std.json.Token`. +pub const AllocWhen = enum { alloc_if_needed, alloc_always }; + +/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default. +/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`. +pub const default_max_value_len = 4 * 1024 * 1024; + +/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader. +pub const Reader = struct { + scanner: Scanner, + reader: *std.Io.Reader, + + /// The allocator is only used to track `[]` and `{}` nesting levels. + pub fn init(allocator: Allocator, io_reader: *std.Io.Reader) @This() { + return .{ + .scanner = Scanner.initStreaming(allocator), + .reader = io_reader, + }; + } + pub fn deinit(self: *@This()) void { + self.scanner.deinit(); + self.* = undefined; + } + + /// Calls `std.json.Scanner.enableDiagnostics`. + pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { + self.scanner.enableDiagnostics(diagnostics); + } + + pub const NextError = std.Io.Reader.Error || Error || Allocator.Error; + pub const SkipError = Reader.NextError; + pub const AllocError = Reader.NextError || error{ValueTooLong}; + pub const PeekError = std.Io.Reader.Error || Error; + + /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) Reader.AllocError!Token { + return self.nextAllocMax(allocator, when, default_max_value_len); + } + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) Reader.AllocError!Token { + const token_type = try self.peekNextTokenType(); + switch (token_type) { + .number, .string => { + var value_list = ArrayList(u8).init(allocator); + errdefer { + value_list.deinit(); + } + if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| { + return if (token_type == .number) + Token{ .number = slice } + else + Token{ .string = slice }; + } else { + return if (token_type == .number) + Token{ .allocated_number = try value_list.toOwnedSlice() } + else + Token{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + + // Simple tokens never alloc. + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => return try self.next(), + } + } + + /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` + pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) Reader.AllocError!?[]const u8 { + return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); + } + /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`. + pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) Reader.AllocError!?[]const u8 { + while (true) { + return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`. + pub fn skipValue(self: *@This()) Reader.SkipError!void { + switch (try self.peekNextTokenType()) { + .object_begin, .array_begin => { + try self.skipUntilStackHeight(self.stackHeight()); + }, + .number, .string => { + while (true) { + switch (try self.next()) { + .partial_number, + .partial_string, + .partial_string_escaped_1, + .partial_string_escaped_2, + .partial_string_escaped_3, + .partial_string_escaped_4, + => continue, + + .number, .string => break, + + else => unreachable, + } + } + }, + .true, .false, .null => { + _ = try self.next(); + }, + + .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. + } + } + /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`. + pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) Reader.NextError!void { + while (true) { + return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// Calls `std.json.Scanner.stackHeight`. + pub fn stackHeight(self: *const @This()) usize { + return self.scanner.stackHeight(); + } + /// Calls `std.json.Scanner.ensureTotalStackCapacity`. + pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { + try self.scanner.ensureTotalStackCapacity(height); + } + + /// See `std.json.Token` for documentation of this function. + pub fn next(self: *@This()) Reader.NextError!Token { + while (true) { + return self.scanner.next() catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// See `std.json.Scanner.peekNextTokenType()`. + pub fn peekNextTokenType(self: *@This()) Reader.PeekError!TokenType { + while (true) { + return self.scanner.peekNextTokenType() catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + fn refillBuffer(self: *@This()) std.Io.Reader.Error!void { + const input = self.reader.peekGreedy(1) catch |err| switch (err) { + error.ReadFailed => return error.ReadFailed, + error.EndOfStream => return self.scanner.endInput(), + }; + self.reader.toss(input.len); + self.scanner.feedInput(input); + } +}; + +const OBJECT_MODE = 0; +const ARRAY_MODE = 1; + +fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void { + const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong; + if (new_len > max_value_len) return error.ValueTooLong; + try list.appendSlice(buf); +} + +/// For the slice you get from a `Token.number` or `Token.allocated_number`, +/// this function returns true if the number doesn't contain any fraction or exponent components, and is not `-0`. +/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`. +/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value. +/// This function will not give meaningful results on non-numeric input. +pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool { + if (std.mem.eql(u8, value, "-0")) return false; + return std.mem.indexOfAny(u8, value, ".eE") == null; +} + +test { + _ = @import("./scanner_test.zig"); +} diff --git a/lib/std/json/Stringify.zig b/lib/std/json/Stringify.zig index eb56fd6437..4d79217a87 100644 --- a/lib/std/json/Stringify.zig +++ b/lib/std/json/Stringify.zig @@ -248,7 +248,7 @@ test print { \\ ] \\} ; - try std.testing.expectEqualStrings(expected, out.getWritten()); + try std.testing.expectEqualStrings(expected, out.buffered()); } /// An alternative to calling `write` that allows you to write directly to the `.writer` field, e.g. with `.writer.writeAll()`. @@ -577,7 +577,7 @@ pub fn value(v: anytype, options: Options, writer: *Writer) Error!void { test value { var out: std.io.Writer.Allocating = .init(std.testing.allocator); - const writer = &out.interface; + const writer = &out.writer; defer out.deinit(); const T = struct { a: i32, b: []const u8 }; @@ -617,9 +617,8 @@ test value { /// Caller owns returned memory. pub fn valueAlloc(gpa: Allocator, v: anytype, options: Options) error{OutOfMemory}![]u8 { var aw: std.io.Writer.Allocating = .init(gpa); - const writer = &aw.interface; defer aw.deinit(); - value(v, options, writer) catch return error.OutOfMemory; + value(v, options, &aw.writer) catch return error.OutOfMemory; return aw.toOwnedSlice(); } @@ -634,23 +633,23 @@ test valueAlloc { try std.testing.expectEqualStrings(expected, actual); } -fn outputUnicodeEscape(codepoint: u21, bw: *Writer) Error!void { +fn outputUnicodeEscape(codepoint: u21, w: *Writer) Error!void { if (codepoint <= 0xFFFF) { // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), // then it may be represented as a six-character sequence: a reverse solidus, followed // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point. - try bw.writeAll("\\u"); - try bw.printInt("x", .{ .width = 4, .fill = '0' }, codepoint); + try w.writeAll("\\u"); + try w.printInt(codepoint, 16, .lower, .{ .width = 4, .fill = '0' }); } else { assert(codepoint <= 0x10FFFF); // To escape an extended character that is not in the Basic Multilingual Plane, // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair. const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; - try bw.writeAll("\\u"); - try bw.printInt("x", .{ .width = 4, .fill = '0' }, high); - try bw.writeAll("\\u"); - try bw.printInt("x", .{ .width = 4, .fill = '0' }, low); + try w.writeAll("\\u"); + try w.printInt(high, 16, .lower, .{ .width = 4, .fill = '0' }); + try w.writeAll("\\u"); + try w.printInt(low, 16, .lower, .{ .width = 4, .fill = '0' }); } } @@ -723,8 +722,8 @@ test "json write stream" { try testBasicWriteStream(&w); } -fn testBasicWriteStream(w: *Stringify) Error!void { - w.writer.reset(); +fn testBasicWriteStream(w: *Stringify) !void { + w.writer.end = 0; try w.beginObject(); @@ -755,19 +754,19 @@ fn testBasicWriteStream(w: *Stringify) Error!void { \\{ \\ "object": { \\ "one": 1, - \\ "two": 2e0 + \\ "two": 2 \\ }, \\ "string": "This is a string", \\ "array": [ \\ "Another string", \\ 1, - \\ 3.5e0 + \\ 3.5 \\ ], \\ "int": 10, - \\ "float": 3.5e0 + \\ "float": 3.5 \\} ; - try std.testing.expectEqualStrings(expected, w.writer.getWritten()); + try std.testing.expectEqualStrings(expected, w.writer.buffered()); } fn getJsonObject(allocator: std.mem.Allocator) !std.json.Value { @@ -804,12 +803,12 @@ test "stringify basic types" { try testStringify("null", @as(?u8, null), .{}); try testStringify("null", @as(?*u32, null), .{}); try testStringify("42", 42, .{}); - try testStringify("4.2e1", 42.0, .{}); + try testStringify("42", 42.0, .{}); try testStringify("42", @as(u8, 42), .{}); try testStringify("42", @as(u128, 42), .{}); try testStringify("9999999999999999", 9999999999999999, .{}); - try testStringify("4.2e1", @as(f32, 42), .{}); - try testStringify("4.2e1", @as(f64, 42), .{}); + try testStringify("42", @as(f32, 42), .{}); + try testStringify("42", @as(f64, 42), .{}); try testStringify("\"ItBroke\"", @as(anyerror, error.ItBroke), .{}); try testStringify("\"ItBroke\"", error.ItBroke, .{}); } @@ -970,9 +969,9 @@ test "stringify struct with custom stringifier" { fn testStringify(expected: []const u8, v: anytype, options: Options) !void { var buffer: [4096]u8 = undefined; - var bw: Writer = .fixed(&buffer); - try value(v, options, &bw); - try std.testing.expectEqualStrings(expected, bw.getWritten()); + var w: Writer = .fixed(&buffer); + try value(v, options, &w); + try std.testing.expectEqualStrings(expected, w.buffered()); } test "raw streaming" { @@ -996,5 +995,5 @@ test "raw streaming" { \\ "long key": "long value" \\} ; - try std.testing.expectEqualStrings(expected, w.writer.getWritten()); + try std.testing.expectEqualStrings(expected, w.writer.buffered()); } diff --git a/lib/std/json/dynamic.zig b/lib/std/json/dynamic.zig index 4754bdb859..b47e7e1067 100644 --- a/lib/std/json/dynamic.zig +++ b/lib/std/json/dynamic.zig @@ -9,10 +9,7 @@ const json = std.json; const ParseOptions = @import("./static.zig").ParseOptions; const ParseError = @import("./static.zig").ParseError; -const JsonScanner = @import("./scanner.zig").Scanner; -const AllocWhen = @import("./scanner.zig").AllocWhen; -const Token = @import("./scanner.zig").Token; -const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; +const isNumberFormattedLikeAnInteger = @import("Scanner.zig").isNumberFormattedLikeAnInteger; pub const ObjectMap = StringArrayHashMap(Value); pub const Array = ArrayList(Value); diff --git a/lib/std/json/dynamic_test.zig b/lib/std/json/dynamic_test.zig index a988e2c2c3..9d991f2c50 100644 --- a/lib/std/json/dynamic_test.zig +++ b/lib/std/json/dynamic_test.zig @@ -16,8 +16,7 @@ const parseFromTokenSource = @import("static.zig").parseFromTokenSource; const parseFromValueLeaky = @import("static.zig").parseFromValueLeaky; const ParseOptions = @import("static.zig").ParseOptions; -const jsonReader = @import("scanner.zig").reader; -const JsonReader = @import("scanner.zig").Reader; +const Scanner = @import("Scanner.zig"); test "json.parser.dynamic" { const s = @@ -99,8 +98,8 @@ test "write json then parse it" { try jw.endObject(); - var fbs: std.io.FixedBufferStream = .{ .buffer = fixed_writer.getWritten() }; - var json_reader = jsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(fixed_writer.buffered()); + var json_reader: Scanner.Reader = .init(testing.allocator, &fbs); defer json_reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &json_reader, .{}); defer parsed.deinit(); @@ -263,7 +262,7 @@ test "Value.jsonStringify" { \\ } \\] ; - try testing.expectEqualStrings(expected, fixed_writer.getWritten()); + try testing.expectEqualStrings(expected, fixed_writer.buffered()); } test "parseFromValue(std.json.Value,...)" { @@ -331,8 +330,8 @@ test "polymorphic parsing" { test "long object value" { const value = "01234567890123456789"; const doc = "{\"key\":\"" ++ value ++ "\"}"; - var fbs: std.io.FixedBufferStream = .{ .buffer = doc }; - var reader = smallBufferJsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(doc); + var reader = smallBufferJsonReader(testing.allocator, &fbs); defer reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{}); defer parsed.deinit(); @@ -364,8 +363,8 @@ test "many object keys" { \\ "k5": "v5" \\} ; - var fbs: std.io.FixedBufferStream = .{ .buffer = doc }; - var reader = smallBufferJsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(doc); + var reader = smallBufferJsonReader(testing.allocator, &fbs); defer reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{}); defer parsed.deinit(); @@ -379,8 +378,8 @@ test "many object keys" { test "negative zero" { const doc = "-0"; - var fbs: std.io.FixedBufferStream = .{ .buffer = doc }; - var reader = smallBufferJsonReader(testing.allocator, fbs.reader()); + var fbs: std.Io.Reader = .fixed(doc); + var reader = smallBufferJsonReader(testing.allocator, &fbs); defer reader.deinit(); var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{}); defer parsed.deinit(); @@ -388,6 +387,6 @@ test "negative zero" { try testing.expect(std.math.isNegativeZero(parsed.value.float)); } -fn smallBufferJsonReader(allocator: Allocator, io_reader: anytype) JsonReader(16, @TypeOf(io_reader)) { - return JsonReader(16, @TypeOf(io_reader)).init(allocator, io_reader); +fn smallBufferJsonReader(allocator: Allocator, io_reader: anytype) Scanner.Reader { + return .init(allocator, io_reader); } diff --git a/lib/std/json/hashmap_test.zig b/lib/std/json/hashmap_test.zig index 4614d751bd..0544eaa68b 100644 --- a/lib/std/json/hashmap_test.zig +++ b/lib/std/json/hashmap_test.zig @@ -10,7 +10,7 @@ const parseFromTokenSource = @import("static.zig").parseFromTokenSource; const parseFromValue = @import("static.zig").parseFromValue; const Value = @import("dynamic.zig").Value; -const jsonReader = @import("./scanner.zig").reader; +const Scanner = @import("Scanner.zig"); const T = struct { i: i32, @@ -39,8 +39,8 @@ test "parse json hashmap while streaming" { \\ "xyz": {"i": 1, "s": "w"} \\} ; - var stream: std.io.FixedBufferStream = .{ .buffer = doc }; - var json_reader = jsonReader(testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(doc); + var json_reader: Scanner.Reader = .init(testing.allocator, &stream); var parsed = try parseFromTokenSource( ArrayHashMap(T), diff --git a/lib/std/json/scanner.zig b/lib/std/json/scanner.zig deleted file mode 100644 index 1836d6775b..0000000000 --- a/lib/std/json/scanner.zig +++ /dev/null @@ -1,1776 +0,0 @@ -// Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259 -// * RFC 8259 requires JSON documents be valid UTF-8, -// but makes an allowance for systems that are "part of a closed ecosystem". -// I have no idea what that's supposed to mean in the context of a standard specification. -// This implementation requires inputs to be valid UTF-8. -// * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits, -// but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed. -// (RFC 5234 defines HEXDIG to only allow uppercase.) -// * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value". -// See http://www.unicode.org/glossary/#unicode_scalar_value . -// * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences, -// but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?), -// which would mean that unpaired surrogate halves are forbidden. -// By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to) -// explicitly allows unpaired surrogate halves. -// This implementation forbids unpaired surrogate halves in \u sequences. -// If a high surrogate half appears in a \u sequence, -// then a low surrogate half must immediately follow in \u notation. -// * RFC 8259 allows implementations to "accept non-JSON forms or extensions". -// This implementation does not accept any of that. -// * RFC 8259 allows implementations to put limits on "the size of texts", -// "the maximum depth of nesting", "the range and precision of numbers", -// and "the length and character contents of strings". -// This low-level implementation does not limit these, -// except where noted above, and except that nesting depth requires memory allocation. -// Note that this low-level API does not interpret numbers numerically, -// but simply emits their source form for some higher level code to make sense of. -// * This low-level implementation allows duplicate object keys, -// and key/value pairs are emitted in the order they appear in the input. - -const std = @import("std"); - -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; -const assert = std.debug.assert; -const BitStack = std.BitStack; - -/// Scan the input and check for malformed JSON. -/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`. -/// Returns any errors from the allocator as-is, which is unlikely, -/// but can be caused by extreme nesting depth in the input. -pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool { - var scanner = Scanner.initCompleteInput(allocator, s); - defer scanner.deinit(); - - while (true) { - const token = scanner.next() catch |err| switch (err) { - error.SyntaxError, error.UnexpectedEndOfInput => return false, - error.OutOfMemory => return error.OutOfMemory, - error.BufferUnderrun => unreachable, - }; - if (token == .end_of_document) break; - } - - return true; -} - -/// The parsing errors are divided into two categories: -/// * `SyntaxError` is for clearly malformed JSON documents, -/// such as giving an input document that isn't JSON at all. -/// * `UnexpectedEndOfInput` is for signaling that everything's been -/// valid so far, but the input appears to be truncated for some reason. -/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`. -pub const Error = error{ SyntaxError, UnexpectedEndOfInput }; - -/// Calls `std.json.Reader` with `std.json.default_buffer_size`. -pub fn reader(allocator: Allocator, io_reader: anytype) Reader(default_buffer_size, @TypeOf(io_reader)) { - return Reader(default_buffer_size, @TypeOf(io_reader)).init(allocator, io_reader); -} -/// Used by `json.reader`. -pub const default_buffer_size = 0x1000; - -/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar: -/// ``` -/// = .end_of_document -/// = -/// | -/// | -/// | -/// | -/// | .true -/// | .false -/// | .null -/// = .object_begin ( )* .object_end -/// = .array_begin ( )* .array_end -/// = -/// = -/// ``` -/// -/// What you get for `` and `` values depends on which `next*()` method you call: -/// -/// ``` -/// next(): -/// = ( .partial_number )* .number -/// = ( )* .string -/// = -/// | .partial_string -/// | .partial_string_escaped_1 -/// | .partial_string_escaped_2 -/// | .partial_string_escaped_3 -/// | .partial_string_escaped_4 -/// -/// nextAlloc*(..., .alloc_always): -/// = .allocated_number -/// = .allocated_string -/// -/// nextAlloc*(..., .alloc_if_needed): -/// = -/// | .number -/// | .allocated_number -/// = -/// | .string -/// | .allocated_string -/// ``` -/// -/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value. -/// For number values, this is the representation of the number exactly as it appears in the input. -/// For strings, this is the content of the string after resolving escape sequences. -/// -/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator. -/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations. -/// -/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences. -/// To get a complete value in memory, you need to concatenate the values yourself. -/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result. -/// -/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer. -/// The memory may become undefined during the next call to `json.Scanner.feedInput()` -/// or any `json.Reader` method whose return error set includes `json.Error`. -/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`, -/// which makes a copy for you. -/// -/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that -/// the previously partial value is completed with no additional bytes. -/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.) -/// `.partial_*` tokens never have `0` length. -/// -/// The recommended strategy for using the different `next*()` methods is something like this: -/// -/// When you're expecting an object key, use `.alloc_if_needed`. -/// You often don't need a copy of the key string to persist; you might just check which field it is. -/// In the case that the key happens to require an allocation, free it immediately after checking it. -/// -/// When you're expecting a meaningful string value (such as on the right of a `:`), -/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document. -/// -/// When you're expecting a number value, use `.alloc_if_needed`. -/// You're probably going to be parsing the string representation of the number into a numeric representation, -/// so you need the complete string representation only temporarily. -/// -/// When you're skipping an unrecognized value, use `skipValue()`. -pub const Token = union(enum) { - object_begin, - object_end, - array_begin, - array_end, - - true, - false, - null, - - number: []const u8, - partial_number: []const u8, - allocated_number: []u8, - - string: []const u8, - partial_string: []const u8, - partial_string_escaped_1: [1]u8, - partial_string_escaped_2: [2]u8, - partial_string_escaped_3: [3]u8, - partial_string_escaped_4: [4]u8, - allocated_string: []u8, - - end_of_document, -}; - -/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call. -pub const TokenType = enum { - object_begin, - object_end, - array_begin, - array_end, - true, - false, - null, - number, - string, - end_of_document, -}; - -/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);` -/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized. -/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()` -/// to get meaningful information from this. -pub const Diagnostics = struct { - line_number: u64 = 1, - line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1. - total_bytes_before_current_input: u64 = 0, - cursor_pointer: *const usize = undefined, - - /// Starts at 1. - pub fn getLine(self: *const @This()) u64 { - return self.line_number; - } - /// Starts at 1. - pub fn getColumn(self: *const @This()) u64 { - return self.cursor_pointer.* -% self.line_start_cursor; - } - /// Starts at 0. Measures the byte offset since the start of the input. - pub fn getByteOffset(self: *const @This()) u64 { - return self.total_bytes_before_current_input + self.cursor_pointer.*; - } -}; - -/// See the documentation for `std.json.Token`. -pub const AllocWhen = enum { alloc_if_needed, alloc_always }; - -/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default. -/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`. -pub const default_max_value_len = 4 * 1024 * 1024; - -/// Connects a `std.io.GenericReader` to a `std.json.Scanner`. -/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader. -pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type { - return struct { - scanner: Scanner, - reader: ReaderType, - - buffer: [buffer_size]u8 = undefined, - - /// The allocator is only used to track `[]` and `{}` nesting levels. - pub fn init(allocator: Allocator, io_reader: ReaderType) @This() { - return .{ - .scanner = Scanner.initStreaming(allocator), - .reader = io_reader, - }; - } - pub fn deinit(self: *@This()) void { - self.scanner.deinit(); - self.* = undefined; - } - - /// Calls `std.json.Scanner.enableDiagnostics`. - pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { - self.scanner.enableDiagnostics(diagnostics); - } - - pub const NextError = ReaderType.Error || Error || Allocator.Error; - pub const SkipError = NextError; - pub const AllocError = NextError || error{ValueTooLong}; - pub const PeekError = ReaderType.Error || Error; - - /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { - return self.nextAllocMax(allocator, when, default_max_value_len); - } - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { - const token_type = try self.peekNextTokenType(); - switch (token_type) { - .number, .string => { - var value_list = ArrayList(u8).init(allocator); - errdefer { - value_list.deinit(); - } - if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| { - return if (token_type == .number) - Token{ .number = slice } - else - Token{ .string = slice }; - } else { - return if (token_type == .number) - Token{ .allocated_number = try value_list.toOwnedSlice() } - else - Token{ .allocated_string = try value_list.toOwnedSlice() }; - } - }, - - // Simple tokens never alloc. - .object_begin, - .object_end, - .array_begin, - .array_end, - .true, - .false, - .null, - .end_of_document, - => return try self.next(), - } - } - - /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` - pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocError!?[]const u8 { - return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); - } - /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`. - pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocError!?[]const u8 { - while (true) { - return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`. - pub fn skipValue(self: *@This()) SkipError!void { - switch (try self.peekNextTokenType()) { - .object_begin, .array_begin => { - try self.skipUntilStackHeight(self.stackHeight()); - }, - .number, .string => { - while (true) { - switch (try self.next()) { - .partial_number, - .partial_string, - .partial_string_escaped_1, - .partial_string_escaped_2, - .partial_string_escaped_3, - .partial_string_escaped_4, - => continue, - - .number, .string => break, - - else => unreachable, - } - } - }, - .true, .false, .null => { - _ = try self.next(); - }, - - .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. - } - } - /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`. - pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void { - while (true) { - return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - /// Calls `std.json.Scanner.stackHeight`. - pub fn stackHeight(self: *const @This()) usize { - return self.scanner.stackHeight(); - } - /// Calls `std.json.Scanner.ensureTotalStackCapacity`. - pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { - try self.scanner.ensureTotalStackCapacity(height); - } - - /// See `std.json.Token` for documentation of this function. - pub fn next(self: *@This()) NextError!Token { - while (true) { - return self.scanner.next() catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - /// See `std.json.Scanner.peekNextTokenType()`. - pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { - while (true) { - return self.scanner.peekNextTokenType() catch |err| switch (err) { - error.BufferUnderrun => { - try self.refillBuffer(); - continue; - }, - else => |other_err| return other_err, - }; - } - } - - fn refillBuffer(self: *@This()) ReaderType.Error!void { - const input = self.buffer[0..try self.reader.read(self.buffer[0..])]; - if (input.len > 0) { - self.scanner.feedInput(input); - } else { - self.scanner.endInput(); - } - } - }; -} - -/// The lowest level parsing API in this package; -/// supports streaming input with a low memory footprint. -/// The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input. -/// Specifically `d/8` bytes are required for this purpose, -/// with some extra buffer according to the implementation of `std.ArrayList`. -/// -/// This scanner can emit partial tokens; see `std.json.Token`. -/// The input to this class is a sequence of input buffers that you must supply one at a time. -/// Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned. -/// Then call `feedInput()` again and so forth. -/// Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`, -/// or when `error.BufferUnderrun` requests more data and there is no more. -/// Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned. -pub const Scanner = struct { - state: State = .value, - string_is_object_key: bool = false, - stack: BitStack, - value_start: usize = undefined, - utf16_code_units: [2]u16 = undefined, - - input: []const u8 = "", - cursor: usize = 0, - is_end_of_input: bool = false, - diagnostics: ?*Diagnostics = null, - - /// The allocator is only used to track `[]` and `{}` nesting levels. - pub fn initStreaming(allocator: Allocator) @This() { - return .{ - .stack = BitStack.init(allocator), - }; - } - /// Use this if your input is a single slice. - /// This is effectively equivalent to: - /// ``` - /// initStreaming(allocator); - /// feedInput(complete_input); - /// endInput(); - /// ``` - pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() { - return .{ - .stack = BitStack.init(allocator), - .input = complete_input, - .is_end_of_input = true, - }; - } - pub fn deinit(self: *@This()) void { - self.stack.deinit(); - self.* = undefined; - } - - pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { - diagnostics.cursor_pointer = &self.cursor; - self.diagnostics = diagnostics; - } - - /// Call this whenever you get `error.BufferUnderrun` from `next()`. - /// When there is no more input to provide, call `endInput()`. - pub fn feedInput(self: *@This(), input: []const u8) void { - assert(self.cursor == self.input.len); // Not done with the last input slice. - if (self.diagnostics) |diag| { - diag.total_bytes_before_current_input += self.input.len; - // This usually goes "negative" to measure how far before the beginning - // of the new buffer the current line started. - diag.line_start_cursor -%= self.cursor; - } - self.input = input; - self.cursor = 0; - self.value_start = 0; - } - /// Call this when you will no longer call `feedInput()` anymore. - /// This can be called either immediately after the last `feedInput()`, - /// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`. - /// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`. - pub fn endInput(self: *@This()) void { - self.is_end_of_input = true; - } - - pub const NextError = Error || Allocator.Error || error{BufferUnderrun}; - pub const AllocError = Error || Allocator.Error || error{ValueTooLong}; - pub const PeekError = Error || error{BufferUnderrun}; - pub const SkipError = Error || Allocator.Error; - pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun}; - - /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` - /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { - return self.nextAllocMax(allocator, when, default_max_value_len); - } - - /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. - /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. - pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { - assert(self.is_end_of_input); // This function is not available in streaming mode. - const token_type = self.peekNextTokenType() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }; - switch (token_type) { - .number, .string => { - var value_list = ArrayList(u8).init(allocator); - errdefer { - value_list.deinit(); - } - if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }) |slice| { - return if (token_type == .number) - Token{ .number = slice } - else - Token{ .string = slice }; - } else { - return if (token_type == .number) - Token{ .allocated_number = try value_list.toOwnedSlice() } - else - Token{ .allocated_string = try value_list.toOwnedSlice() }; - } - }, - - // Simple tokens never alloc. - .object_begin, - .object_end, - .array_begin, - .array_end, - .true, - .false, - .null, - .end_of_document, - => return self.next() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }, - } - } - - /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` - pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 { - return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); - } - /// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`. - /// When allocation is not necessary with `.alloc_if_needed`, - /// this method returns the content slice from the input buffer, and `value_list` is not touched. - /// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`, - /// and returns `null` once the final `.number` or `.string` token has been written into it. - /// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list. - /// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation - /// can be resumed by passing the same array list in again. - /// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type; - /// the caller of this method is expected to know which type of token is being processed. - pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 { - while (true) { - const token = try self.next(); - switch (token) { - // Accumulate partial values. - .partial_number, .partial_string => |slice| { - try appendSlice(value_list, slice, max_value_len); - }, - .partial_string_escaped_1 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - .partial_string_escaped_2 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - .partial_string_escaped_3 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - .partial_string_escaped_4 => |buf| { - try appendSlice(value_list, buf[0..], max_value_len); - }, - - // Return complete values. - .number => |slice| { - if (when == .alloc_if_needed and value_list.items.len == 0) { - // No alloc necessary. - return slice; - } - try appendSlice(value_list, slice, max_value_len); - // The token is complete. - return null; - }, - .string => |slice| { - if (when == .alloc_if_needed and value_list.items.len == 0) { - // No alloc necessary. - return slice; - } - try appendSlice(value_list, slice, max_value_len); - // The token is complete. - return null; - }, - - .object_begin, - .object_end, - .array_begin, - .array_end, - .true, - .false, - .null, - .end_of_document, - => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this. - - .allocated_number, .allocated_string => unreachable, - } - } - } - - /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. - /// If the next token type is `.object_begin` or `.array_begin`, - /// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found. - /// If the next token type is `.number` or `.string`, - /// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found. - /// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once. - /// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`; - /// see `peekNextTokenType()`. - pub fn skipValue(self: *@This()) SkipError!void { - assert(self.is_end_of_input); // This function is not available in streaming mode. - switch (self.peekNextTokenType() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }) { - .object_begin, .array_begin => { - self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }; - }, - .number, .string => { - while (true) { - switch (self.next() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }) { - .partial_number, - .partial_string, - .partial_string_escaped_1, - .partial_string_escaped_2, - .partial_string_escaped_3, - .partial_string_escaped_4, - => continue, - - .number, .string => break, - - else => unreachable, - } - } - }, - .true, .false, .null => { - _ = self.next() catch |e| switch (e) { - error.BufferUnderrun => unreachable, - else => |err| return err, - }; - }, - - .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. - } - } - - /// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height. - /// Unlike `skipValue()`, this function is available in streaming mode. - pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void { - while (true) { - switch (try self.next()) { - .object_end, .array_end => { - if (self.stackHeight() == terminal_stack_height) break; - }, - .end_of_document => unreachable, - else => continue, - } - } - } - - /// The depth of `{}` or `[]` nesting levels at the current position. - pub fn stackHeight(self: *const @This()) usize { - return self.stack.bit_len; - } - - /// Pre allocate memory to hold the given number of nesting levels. - /// `stackHeight()` up to the given number will not cause allocations. - pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void { - try self.stack.ensureTotalCapacity(height); - } - - /// See `std.json.Token` for documentation of this function. - pub fn next(self: *@This()) NextError!Token { - state_loop: while (true) { - switch (self.state) { - .value => { - switch (try self.skipWhitespaceExpectByte()) { - // Object, Array - '{' => { - try self.stack.push(OBJECT_MODE); - self.cursor += 1; - self.state = .object_start; - return .object_begin; - }, - '[' => { - try self.stack.push(ARRAY_MODE); - self.cursor += 1; - self.state = .array_start; - return .array_begin; - }, - - // String - '"' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - continue :state_loop; - }, - - // Number - '1'...'9' => { - self.value_start = self.cursor; - self.cursor += 1; - self.state = .number_int; - continue :state_loop; - }, - '0' => { - self.value_start = self.cursor; - self.cursor += 1; - self.state = .number_leading_zero; - continue :state_loop; - }, - '-' => { - self.value_start = self.cursor; - self.cursor += 1; - self.state = .number_minus; - continue :state_loop; - }, - - // literal values - 't' => { - self.cursor += 1; - self.state = .literal_t; - continue :state_loop; - }, - 'f' => { - self.cursor += 1; - self.state = .literal_f; - continue :state_loop; - }, - 'n' => { - self.cursor += 1; - self.state = .literal_n; - continue :state_loop; - }, - - else => return error.SyntaxError, - } - }, - - .post_value => { - if (try self.skipWhitespaceCheckEnd()) return .end_of_document; - - const c = self.input[self.cursor]; - if (self.string_is_object_key) { - self.string_is_object_key = false; - switch (c) { - ':' => { - self.cursor += 1; - self.state = .value; - continue :state_loop; - }, - else => return error.SyntaxError, - } - } - - switch (c) { - '}' => { - if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError; - self.cursor += 1; - // stay in .post_value state. - return .object_end; - }, - ']' => { - if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError; - self.cursor += 1; - // stay in .post_value state. - return .array_end; - }, - ',' => { - switch (self.stack.peek()) { - OBJECT_MODE => { - self.state = .object_post_comma; - }, - ARRAY_MODE => { - self.state = .value; - }, - } - self.cursor += 1; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - - .object_start => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - self.string_is_object_key = true; - continue :state_loop; - }, - '}' => { - self.cursor += 1; - _ = self.stack.pop(); - self.state = .post_value; - return .object_end; - }, - else => return error.SyntaxError, - } - }, - .object_post_comma => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - self.string_is_object_key = true; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - - .array_start => { - switch (try self.skipWhitespaceExpectByte()) { - ']' => { - self.cursor += 1; - _ = self.stack.pop(); - self.state = .post_value; - return .array_end; - }, - else => { - self.state = .value; - continue :state_loop; - }, - } - }, - - .number_minus => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0' => { - self.cursor += 1; - self.state = .number_leading_zero; - continue :state_loop; - }, - '1'...'9' => { - self.cursor += 1; - self.state = .number_int; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_leading_zero => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true); - switch (self.input[self.cursor]) { - '.' => { - self.cursor += 1; - self.state = .number_post_dot; - continue :state_loop; - }, - 'e', 'E' => { - self.cursor += 1; - self.state = .number_post_e; - continue :state_loop; - }, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - }, - .number_int => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - '0'...'9' => continue, - '.' => { - self.cursor += 1; - self.state = .number_post_dot; - continue :state_loop; - }, - 'e', 'E' => { - self.cursor += 1; - self.state = .number_post_e; - continue :state_loop; - }, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - } - return self.endOfBufferInNumber(true); - }, - .number_post_dot => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0'...'9' => { - self.cursor += 1; - self.state = .number_frac; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_frac => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - '0'...'9' => continue, - 'e', 'E' => { - self.cursor += 1; - self.state = .number_post_e; - continue :state_loop; - }, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - } - return self.endOfBufferInNumber(true); - }, - .number_post_e => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0'...'9' => { - self.cursor += 1; - self.state = .number_exp; - continue :state_loop; - }, - '+', '-' => { - self.cursor += 1; - self.state = .number_post_e_sign; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_post_e_sign => { - if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); - switch (self.input[self.cursor]) { - '0'...'9' => { - self.cursor += 1; - self.state = .number_exp; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .number_exp => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - '0'...'9' => continue, - else => { - self.state = .post_value; - return Token{ .number = self.takeValueSlice() }; - }, - } - } - return self.endOfBufferInNumber(true); - }, - - .string => { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string. - - // ASCII plain text. - 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue, - - // Special characters. - '"' => { - const result = Token{ .string = self.takeValueSlice() }; - self.cursor += 1; - self.state = .post_value; - return result; - }, - '\\' => { - const slice = self.takeValueSlice(); - self.cursor += 1; - self.state = .string_backslash; - if (slice.len > 0) return Token{ .partial_string = slice }; - continue :state_loop; - }, - - // UTF-8 validation. - // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String - 0xC2...0xDF => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - 0xE0 => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte_guard_against_overlong; - continue :state_loop; - }, - 0xE1...0xEC, 0xEE...0xEF => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - 0xED => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half; - continue :state_loop; - }, - 0xF0 => { - self.cursor += 1; - self.state = .string_utf8_third_to_last_byte_guard_against_overlong; - continue :state_loop; - }, - 0xF1...0xF3 => { - self.cursor += 1; - self.state = .string_utf8_third_to_last_byte; - continue :state_loop; - }, - 0xF4 => { - self.cursor += 1; - self.state = .string_utf8_third_to_last_byte_guard_against_too_large; - continue :state_loop; - }, - 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8. - } - } - if (self.is_end_of_input) return error.UnexpectedEndOfInput; - const slice = self.takeValueSlice(); - if (slice.len > 0) return Token{ .partial_string = slice }; - return error.BufferUnderrun; - }, - .string_backslash => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - '"', '\\', '/' => { - // Since these characters now represent themselves literally, - // we can simply begin the next plaintext slice here. - self.value_start = self.cursor; - self.cursor += 1; - self.state = .string; - continue :state_loop; - }, - 'b' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{0x08} }; - }, - 'f' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{0x0c} }; - }, - 'n' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{'\n'} }; - }, - 'r' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{'\r'} }; - }, - 't' => { - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - return Token{ .partial_string_escaped_1 = [_]u8{'\t'} }; - }, - 'u' => { - self.cursor += 1; - self.state = .string_backslash_u; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .string_backslash_u => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] = @as(u16, c - '0') << 12; - }, - 'A'...'F' => { - self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12; - }, - 'a'...'f' => { - self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.state = .string_backslash_u_1; - continue :state_loop; - }, - .string_backslash_u_1 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] |= @as(u16, c - '0') << 8; - }, - 'A'...'F' => { - self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8; - }, - 'a'...'f' => { - self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.state = .string_backslash_u_2; - continue :state_loop; - }, - .string_backslash_u_2 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] |= @as(u16, c - '0') << 4; - }, - 'A'...'F' => { - self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4; - }, - 'a'...'f' => { - self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.state = .string_backslash_u_3; - continue :state_loop; - }, - .string_backslash_u_3 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[0] |= c - '0'; - }, - 'A'...'F' => { - self.utf16_code_units[0] |= c - 'A' + 10; - }, - 'a'...'f' => { - self.utf16_code_units[0] |= c - 'a' + 10; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) { - self.state = .string_surrogate_half; - continue :state_loop; - } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) { - return error.SyntaxError; // Unexpected low surrogate half. - } else { - self.value_start = self.cursor; - self.state = .string; - return partialStringCodepoint(self.utf16_code_units[0]); - } - }, - .string_surrogate_half => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - '\\' => { - self.cursor += 1; - self.state = .string_surrogate_half_backslash; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 'u' => { - self.cursor += 1; - self.state = .string_surrogate_half_backslash_u; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash_u => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 'D', 'd' => { - self.cursor += 1; - self.utf16_code_units[1] = 0xD << 12; - self.state = .string_surrogate_half_backslash_u_1; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash_u_1 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - 'C'...'F' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8; - self.state = .string_surrogate_half_backslash_u_2; - continue :state_loop; - }, - 'c'...'f' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8; - self.state = .string_surrogate_half_backslash_u_2; - continue :state_loop; - }, - else => return error.SyntaxError, // Expected low surrogate half. - } - }, - .string_surrogate_half_backslash_u_2 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - '0') << 4; - self.state = .string_surrogate_half_backslash_u_3; - continue :state_loop; - }, - 'A'...'F' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4; - self.state = .string_surrogate_half_backslash_u_3; - continue :state_loop; - }, - 'a'...'f' => { - self.cursor += 1; - self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4; - self.state = .string_surrogate_half_backslash_u_3; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .string_surrogate_half_backslash_u_3 => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - const c = self.input[self.cursor]; - switch (c) { - '0'...'9' => { - self.utf16_code_units[1] |= c - '0'; - }, - 'A'...'F' => { - self.utf16_code_units[1] |= c - 'A' + 10; - }, - 'a'...'f' => { - self.utf16_code_units[1] |= c - 'a' + 10; - }, - else => return error.SyntaxError, - } - self.cursor += 1; - self.value_start = self.cursor; - self.state = .string; - const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable; - return partialStringCodepoint(code_point); - }, - - .string_utf8_last_byte => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0xBF => { - self.cursor += 1; - self.state = .string; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_second_to_last_byte => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0xBF => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_second_to_last_byte_guard_against_overlong => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0xA0...0xBF => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_second_to_last_byte_guard_against_surrogate_half => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0x9F => { - self.cursor += 1; - self.state = .string_utf8_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_third_to_last_byte => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0xBF => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_third_to_last_byte_guard_against_overlong => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x90...0xBF => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - .string_utf8_third_to_last_byte_guard_against_too_large => { - if (self.cursor >= self.input.len) return self.endOfBufferInString(); - switch (self.input[self.cursor]) { - 0x80...0x8F => { - self.cursor += 1; - self.state = .string_utf8_second_to_last_byte; - continue :state_loop; - }, - else => return error.SyntaxError, // Invalid UTF-8. - } - }, - - .literal_t => { - switch (try self.expectByte()) { - 'r' => { - self.cursor += 1; - self.state = .literal_tr; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_tr => { - switch (try self.expectByte()) { - 'u' => { - self.cursor += 1; - self.state = .literal_tru; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_tru => { - switch (try self.expectByte()) { - 'e' => { - self.cursor += 1; - self.state = .post_value; - return .true; - }, - else => return error.SyntaxError, - } - }, - .literal_f => { - switch (try self.expectByte()) { - 'a' => { - self.cursor += 1; - self.state = .literal_fa; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_fa => { - switch (try self.expectByte()) { - 'l' => { - self.cursor += 1; - self.state = .literal_fal; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_fal => { - switch (try self.expectByte()) { - 's' => { - self.cursor += 1; - self.state = .literal_fals; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_fals => { - switch (try self.expectByte()) { - 'e' => { - self.cursor += 1; - self.state = .post_value; - return .false; - }, - else => return error.SyntaxError, - } - }, - .literal_n => { - switch (try self.expectByte()) { - 'u' => { - self.cursor += 1; - self.state = .literal_nu; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_nu => { - switch (try self.expectByte()) { - 'l' => { - self.cursor += 1; - self.state = .literal_nul; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - .literal_nul => { - switch (try self.expectByte()) { - 'l' => { - self.cursor += 1; - self.state = .post_value; - return .null; - }, - else => return error.SyntaxError, - } - }, - } - unreachable; - } - } - - /// Seeks ahead in the input until the first byte of the next token (or the end of the input) - /// determines which type of token will be returned from the next `next*()` call. - /// This function is idempotent, only advancing past commas, colons, and inter-token whitespace. - pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { - state_loop: while (true) { - switch (self.state) { - .value => { - switch (try self.skipWhitespaceExpectByte()) { - '{' => return .object_begin, - '[' => return .array_begin, - '"' => return .string, - '-', '0'...'9' => return .number, - 't' => return .true, - 'f' => return .false, - 'n' => return .null, - else => return error.SyntaxError, - } - }, - - .post_value => { - if (try self.skipWhitespaceCheckEnd()) return .end_of_document; - - const c = self.input[self.cursor]; - if (self.string_is_object_key) { - self.string_is_object_key = false; - switch (c) { - ':' => { - self.cursor += 1; - self.state = .value; - continue :state_loop; - }, - else => return error.SyntaxError, - } - } - - switch (c) { - '}' => return .object_end, - ']' => return .array_end, - ',' => { - switch (self.stack.peek()) { - OBJECT_MODE => { - self.state = .object_post_comma; - }, - ARRAY_MODE => { - self.state = .value; - }, - } - self.cursor += 1; - continue :state_loop; - }, - else => return error.SyntaxError, - } - }, - - .object_start => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => return .string, - '}' => return .object_end, - else => return error.SyntaxError, - } - }, - .object_post_comma => { - switch (try self.skipWhitespaceExpectByte()) { - '"' => return .string, - else => return error.SyntaxError, - } - }, - - .array_start => { - switch (try self.skipWhitespaceExpectByte()) { - ']' => return .array_end, - else => { - self.state = .value; - continue :state_loop; - }, - } - }, - - .number_minus, - .number_leading_zero, - .number_int, - .number_post_dot, - .number_frac, - .number_post_e, - .number_post_e_sign, - .number_exp, - => return .number, - - .string, - .string_backslash, - .string_backslash_u, - .string_backslash_u_1, - .string_backslash_u_2, - .string_backslash_u_3, - .string_surrogate_half, - .string_surrogate_half_backslash, - .string_surrogate_half_backslash_u, - .string_surrogate_half_backslash_u_1, - .string_surrogate_half_backslash_u_2, - .string_surrogate_half_backslash_u_3, - => return .string, - - .string_utf8_last_byte, - .string_utf8_second_to_last_byte, - .string_utf8_second_to_last_byte_guard_against_overlong, - .string_utf8_second_to_last_byte_guard_against_surrogate_half, - .string_utf8_third_to_last_byte, - .string_utf8_third_to_last_byte_guard_against_overlong, - .string_utf8_third_to_last_byte_guard_against_too_large, - => return .string, - - .literal_t, - .literal_tr, - .literal_tru, - => return .true, - .literal_f, - .literal_fa, - .literal_fal, - .literal_fals, - => return .false, - .literal_n, - .literal_nu, - .literal_nul, - => return .null, - } - unreachable; - } - } - - const State = enum { - value, - post_value, - - object_start, - object_post_comma, - - array_start, - - number_minus, - number_leading_zero, - number_int, - number_post_dot, - number_frac, - number_post_e, - number_post_e_sign, - number_exp, - - string, - string_backslash, - string_backslash_u, - string_backslash_u_1, - string_backslash_u_2, - string_backslash_u_3, - string_surrogate_half, - string_surrogate_half_backslash, - string_surrogate_half_backslash_u, - string_surrogate_half_backslash_u_1, - string_surrogate_half_backslash_u_2, - string_surrogate_half_backslash_u_3, - - // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String - string_utf8_last_byte, // State A - string_utf8_second_to_last_byte, // State B - string_utf8_second_to_last_byte_guard_against_overlong, // State C - string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D - string_utf8_third_to_last_byte, // State E - string_utf8_third_to_last_byte_guard_against_overlong, // State F - string_utf8_third_to_last_byte_guard_against_too_large, // State G - - literal_t, - literal_tr, - literal_tru, - literal_f, - literal_fa, - literal_fal, - literal_fals, - literal_n, - literal_nu, - literal_nul, - }; - - fn expectByte(self: *const @This()) !u8 { - if (self.cursor < self.input.len) { - return self.input[self.cursor]; - } - // No byte. - if (self.is_end_of_input) return error.UnexpectedEndOfInput; - return error.BufferUnderrun; - } - - fn skipWhitespace(self: *@This()) void { - while (self.cursor < self.input.len) : (self.cursor += 1) { - switch (self.input[self.cursor]) { - // Whitespace - ' ', '\t', '\r' => continue, - '\n' => { - if (self.diagnostics) |diag| { - diag.line_number += 1; - // This will count the newline itself, - // which means a straight-forward subtraction will give a 1-based column number. - diag.line_start_cursor = self.cursor; - } - continue; - }, - else => return, - } - } - } - - fn skipWhitespaceExpectByte(self: *@This()) !u8 { - self.skipWhitespace(); - return self.expectByte(); - } - - fn skipWhitespaceCheckEnd(self: *@This()) !bool { - self.skipWhitespace(); - if (self.cursor >= self.input.len) { - // End of buffer. - if (self.is_end_of_input) { - // End of everything. - if (self.stackHeight() == 0) { - // We did it! - return true; - } - return error.UnexpectedEndOfInput; - } - return error.BufferUnderrun; - } - if (self.stackHeight() == 0) return error.SyntaxError; - return false; - } - - fn takeValueSlice(self: *@This()) []const u8 { - const slice = self.input[self.value_start..self.cursor]; - self.value_start = self.cursor; - return slice; - } - fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 { - // Check if the escape sequence started before the current input buffer. - // (The algebra here is awkward to avoid unsigned underflow, - // but it's just making sure the slice on the next line isn't UB.) - if (self.cursor <= self.value_start + trailing_negative_offset) return ""; - const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset]; - // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter, - // because we always set it again while emitting the .partial_string_escaped_*. - self.value_start = self.cursor; - return slice; - } - - fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token { - const slice = self.takeValueSlice(); - if (self.is_end_of_input) { - if (!allow_end) return error.UnexpectedEndOfInput; - self.state = .post_value; - return Token{ .number = slice }; - } - if (slice.len == 0) return error.BufferUnderrun; - return Token{ .partial_number = slice }; - } - - fn endOfBufferInString(self: *@This()) !Token { - if (self.is_end_of_input) return error.UnexpectedEndOfInput; - const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) { - // Don't include the escape sequence in the partial string. - .string_backslash => 1, - .string_backslash_u => 2, - .string_backslash_u_1 => 3, - .string_backslash_u_2 => 4, - .string_backslash_u_3 => 5, - .string_surrogate_half => 6, - .string_surrogate_half_backslash => 7, - .string_surrogate_half_backslash_u => 8, - .string_surrogate_half_backslash_u_1 => 9, - .string_surrogate_half_backslash_u_2 => 10, - .string_surrogate_half_backslash_u_3 => 11, - - // Include everything up to the cursor otherwise. - .string, - .string_utf8_last_byte, - .string_utf8_second_to_last_byte, - .string_utf8_second_to_last_byte_guard_against_overlong, - .string_utf8_second_to_last_byte_guard_against_surrogate_half, - .string_utf8_third_to_last_byte, - .string_utf8_third_to_last_byte_guard_against_overlong, - .string_utf8_third_to_last_byte_guard_against_too_large, - => 0, - - else => unreachable, - }); - if (slice.len == 0) return error.BufferUnderrun; - return Token{ .partial_string = slice }; - } - - fn partialStringCodepoint(code_point: u21) Token { - var buf: [4]u8 = undefined; - switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) { - 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* }, - 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* }, - 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* }, - 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* }, - else => unreachable, - } - } -}; - -const OBJECT_MODE = 0; -const ARRAY_MODE = 1; - -fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void { - const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong; - if (new_len > max_value_len) return error.ValueTooLong; - try list.appendSlice(buf); -} - -/// For the slice you get from a `Token.number` or `Token.allocated_number`, -/// this function returns true if the number doesn't contain any fraction or exponent components, and is not `-0`. -/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`. -/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value. -/// This function will not give meaningful results on non-numeric input. -pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool { - if (std.mem.eql(u8, value, "-0")) return false; - return std.mem.indexOfAny(u8, value, ".eE") == null; -} - -test { - _ = @import("./scanner_test.zig"); -} diff --git a/lib/std/json/scanner_test.zig b/lib/std/json/scanner_test.zig index ec6c18edab..eb5d5cb75e 100644 --- a/lib/std/json/scanner_test.zig +++ b/lib/std/json/scanner_test.zig @@ -1,13 +1,11 @@ const std = @import("std"); -const JsonScanner = @import("./scanner.zig").Scanner; -const jsonReader = @import("./scanner.zig").reader; -const JsonReader = @import("./scanner.zig").Reader; -const Token = @import("./scanner.zig").Token; -const TokenType = @import("./scanner.zig").TokenType; -const Diagnostics = @import("./scanner.zig").Diagnostics; -const Error = @import("./scanner.zig").Error; -const validate = @import("./scanner.zig").validate; -const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; +const Scanner = @import("Scanner.zig"); +const Token = Scanner.Token; +const TokenType = Scanner.TokenType; +const Diagnostics = Scanner.Diagnostics; +const Error = Scanner.Error; +const validate = Scanner.validate; +const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger; const example_document_str = \\{ @@ -36,7 +34,7 @@ fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, ex } test "token" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str); + var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str); defer scanner.deinit(); try expectNext(&scanner, .object_begin); @@ -138,23 +136,25 @@ fn testAllTypes(source: anytype, large_buffer: bool) !void { } test "peek all types" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, all_types_test_case); + var scanner = Scanner.initCompleteInput(std.testing.allocator, all_types_test_case); defer scanner.deinit(); try testAllTypes(&scanner, true); - var stream: std.io.FixedBufferStream = .{ .buffer = all_types_test_case }; - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(all_types_test_case); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); try testAllTypes(&json_reader, true); - var tiny_stream: std.io.FixedBufferStream = .{ .buffer = all_types_test_case }; - var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + var tiny_buffer: [1]u8 = undefined; + var tiny_stream: std.testing.Reader = .init(&tiny_buffer, &.{.{ .buffer = all_types_test_case }}); + tiny_stream.artificial_limit = .limited(1); + var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream.interface); defer tiny_json_reader.deinit(); try testAllTypes(&tiny_json_reader, false); } test "token mismatched close" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }"); + var scanner = Scanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }"); defer scanner.deinit(); try expectNext(&scanner, .array_begin); try expectNext(&scanner, Token{ .number = "102" }); @@ -164,15 +164,15 @@ test "token mismatched close" { } test "token premature object close" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "{ \"key\": }"); + var scanner = Scanner.initCompleteInput(std.testing.allocator, "{ \"key\": }"); defer scanner.deinit(); try expectNext(&scanner, .object_begin); try expectNext(&scanner, Token{ .string = "key" }); try std.testing.expectError(error.SyntaxError, scanner.next()); } -test "JsonScanner basic" { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str); +test "Scanner basic" { + var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str); defer scanner.deinit(); while (true) { @@ -181,10 +181,10 @@ test "JsonScanner basic" { } } -test "JsonReader basic" { - var stream: std.io.FixedBufferStream = .{ .buffer = example_document_str }; +test "Scanner.Reader basic" { + var stream: std.Io.Reader = .fixed(example_document_str); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); while (true) { @@ -215,7 +215,7 @@ const number_test_items = blk: { test "numbers" { for (number_test_items) |number_str| { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, number_str); + var scanner = Scanner.initCompleteInput(std.testing.allocator, number_str); defer scanner.deinit(); const token = try scanner.next(); @@ -243,10 +243,10 @@ const string_test_cases = .{ test "strings" { inline for (string_test_cases) |tuple| { - var stream: std.io.FixedBufferStream = .{ .buffer = "\"" ++ tuple[0] ++ "\"" }; + var stream: std.Io.Reader = .fixed("\"" ++ tuple[0] ++ "\""); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed); @@ -289,7 +289,7 @@ test "nesting" { } fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, document_str); + var scanner = Scanner.initCompleteInput(std.testing.allocator, document_str); defer scanner.deinit(); while (true) { @@ -352,12 +352,12 @@ fn expectEqualTokens(expected_token: Token, actual_token: Token) !void { } fn testTinyBufferSize(document_str: []const u8) !void { - var tiny_stream: std.io.FixedBufferStream = .{ .buffer = document_str }; - var normal_stream: std.io.FixedBufferStream = .{ .buffer = document_str }; + var tiny_stream: std.Io.Reader = .fixed(document_str); + var normal_stream: std.Io.Reader = .fixed(document_str); - var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream); defer tiny_json_reader.deinit(); - var normal_json_reader = JsonReader(0x1000, @TypeOf(normal_stream.reader())).init(std.testing.allocator, normal_stream.reader()); + var normal_json_reader: Scanner.Reader = .init(std.testing.allocator, &normal_stream); defer normal_json_reader.deinit(); expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| { @@ -397,13 +397,13 @@ test "validate" { } fn testSkipValue(s: []const u8) !void { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s); + var scanner = Scanner.initCompleteInput(std.testing.allocator, s); defer scanner.deinit(); try scanner.skipValue(); try expectEqualTokens(.end_of_document, try scanner.next()); - var stream: std.io.FixedBufferStream = .{ .buffer = s }; - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(s); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); try json_reader.skipValue(); try expectEqualTokens(.end_of_document, try json_reader.next()); @@ -441,7 +441,7 @@ fn testEnsureStackCapacity(do_ensure: bool) !void { try input_string.appendNTimes(std.testing.allocator, ']', nestings); defer input_string.deinit(std.testing.allocator); - var scanner = JsonScanner.initCompleteInput(failing_allocator, input_string.items); + var scanner = Scanner.initCompleteInput(failing_allocator, input_string.items); defer scanner.deinit(); if (do_ensure) { @@ -473,17 +473,17 @@ fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byt try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset()); } fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void { - var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s); + var scanner = Scanner.initCompleteInput(std.testing.allocator, s); defer scanner.deinit(); try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner); - var tiny_stream: std.io.FixedBufferStream = .{ .buffer = s }; - var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + var tiny_stream: std.Io.Reader = .fixed(s); + var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream); defer tiny_json_reader.deinit(); try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader); - var medium_stream: std.io.FixedBufferStream = .{ .buffer = s }; - var medium_json_reader = JsonReader(5, @TypeOf(medium_stream.reader())).init(std.testing.allocator, medium_stream.reader()); + var medium_stream: std.Io.Reader = .fixed(s); + var medium_json_reader: Scanner.Reader = .init(std.testing.allocator, &medium_stream); defer medium_json_reader.deinit(); try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader); } diff --git a/lib/std/json/static.zig b/lib/std/json/static.zig index 2504d59100..44469adf4c 100644 --- a/lib/std/json/static.zig +++ b/lib/std/json/static.zig @@ -4,11 +4,11 @@ const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; const ArrayList = std.ArrayList; -const Scanner = @import("./scanner.zig").Scanner; -const Token = @import("./scanner.zig").Token; -const AllocWhen = @import("./scanner.zig").AllocWhen; -const default_max_value_len = @import("./scanner.zig").default_max_value_len; -const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; +const Scanner = @import("Scanner.zig"); +const Token = Scanner.Token; +const AllocWhen = Scanner.AllocWhen; +const default_max_value_len = Scanner.default_max_value_len; +const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger; const Value = @import("./dynamic.zig").Value; const Array = @import("./dynamic.zig").Array; diff --git a/lib/std/json/static_test.zig b/lib/std/json/static_test.zig index 38edbf7f0a..735ccd82e1 100644 --- a/lib/std/json/static_test.zig +++ b/lib/std/json/static_test.zig @@ -12,9 +12,7 @@ const parseFromValue = @import("./static.zig").parseFromValue; const parseFromValueLeaky = @import("./static.zig").parseFromValueLeaky; const ParseOptions = @import("./static.zig").ParseOptions; -const JsonScanner = @import("./scanner.zig").Scanner; -const jsonReader = @import("./scanner.zig").reader; -const Diagnostics = @import("./scanner.zig").Diagnostics; +const Scanner = @import("Scanner.zig"); const Value = @import("./dynamic.zig").Value; @@ -300,9 +298,9 @@ const subnamespaces_0_doc = fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void { // First do the one with the debug info in case we get a SyntaxError or something. { - var scanner = JsonScanner.initCompleteInput(testing.allocator, doc); + var scanner = Scanner.initCompleteInput(testing.allocator, doc); defer scanner.deinit(); - var diagnostics = Diagnostics{}; + var diagnostics = Scanner.Diagnostics{}; scanner.enableDiagnostics(&diagnostics); var parsed = parseFromTokenSource(T, testing.allocator, &scanner, .{}) catch |e| { std.debug.print("at line,col: {}:{}\n", .{ diagnostics.getLine(), diagnostics.getColumn() }); @@ -317,8 +315,8 @@ fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void { try testing.expectEqualDeep(expected, parsed.value); } { - var stream: std.io.FixedBufferStream = .{ .buffer = doc }; - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(doc); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); var parsed = try parseFromTokenSource(T, testing.allocator, &json_reader, .{}); defer parsed.deinit(); @@ -331,13 +329,13 @@ fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void { try testing.expectEqualDeep(expected, try parseFromSliceLeaky(T, arena.allocator(), doc, .{})); } { - var scanner = JsonScanner.initCompleteInput(testing.allocator, doc); + var scanner = Scanner.initCompleteInput(testing.allocator, doc); defer scanner.deinit(); try testing.expectEqualDeep(expected, try parseFromTokenSourceLeaky(T, arena.allocator(), &scanner, .{})); } { - var stream: std.io.FixedBufferStream = .{ .buffer = doc }; - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(doc); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); try testing.expectEqualDeep(expected, try parseFromTokenSourceLeaky(T, arena.allocator(), &json_reader, .{})); } @@ -763,7 +761,7 @@ test "parse exponential into int" { test "parseFromTokenSource" { { - var scanner = JsonScanner.initCompleteInput(testing.allocator, "123"); + var scanner = Scanner.initCompleteInput(testing.allocator, "123"); defer scanner.deinit(); var parsed = try parseFromTokenSource(u32, testing.allocator, &scanner, .{}); defer parsed.deinit(); @@ -771,8 +769,8 @@ test "parseFromTokenSource" { } { - var stream: std.io.FixedBufferStream = .{ .buffer = "123" }; - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed("123"); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); defer json_reader.deinit(); var parsed = try parseFromTokenSource(u32, testing.allocator, &json_reader, .{}); defer parsed.deinit(); @@ -836,7 +834,7 @@ test "json parse partial" { \\} ; const allocator = testing.allocator; - var scanner = JsonScanner.initCompleteInput(allocator, str); + var scanner = Scanner.initCompleteInput(allocator, str); defer scanner.deinit(); var arena = ArenaAllocator.init(allocator); @@ -886,8 +884,8 @@ test "json parse allocate when streaming" { var arena = ArenaAllocator.init(allocator); defer arena.deinit(); - var stream: std.io.FixedBufferStream = .{ .buffer = str }; - var json_reader = jsonReader(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(str); + var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream); const parsed = parseFromTokenSourceLeaky(T, arena.allocator(), &json_reader, .{}) catch |err| { json_reader.deinit(); diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 6d1886f9ff..d3d803e939 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -2,8 +2,7 @@ const std = @import("std"); const json = std.json; const testing = std.testing; const parseFromSlice = @import("./static.zig").parseFromSlice; -const validate = @import("./scanner.zig").validate; -const JsonScanner = @import("./scanner.zig").Scanner; +const Scanner = @import("./Scanner.zig"); const Value = @import("./dynamic.zig").Value; // Support for JSONTestSuite.zig @@ -20,7 +19,7 @@ pub fn any(s: []const u8) !void { testHighLevelDynamicParser(s) catch {}; } fn testLowLevelScanner(s: []const u8) !void { - var scanner = JsonScanner.initCompleteInput(testing.allocator, s); + var scanner = Scanner.initCompleteInput(testing.allocator, s); defer scanner.deinit(); while (true) { const token = try scanner.next(); @@ -47,7 +46,7 @@ test "n_object_closed_missing_value" { } fn roundTrip(s: []const u8) !void { - try testing.expect(try validate(testing.allocator, s)); + try testing.expect(try Scanner.validate(testing.allocator, s)); var parsed = try parseFromSlice(Value, testing.allocator, s, .{}); defer parsed.deinit(); diff --git a/lib/std/zig.zig b/lib/std/zig.zig index ad264a9b33..486947768d 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -446,8 +446,8 @@ pub fn fmtString(bytes: []const u8) std.fmt.Formatter([]const u8, stringEscape) } /// Return a formatter for escaping a single quoted Zig string. -pub fn fmtChar(bytes: []const u8) std.fmt.Formatter([]const u8, charEscape) { - return .{ .data = bytes }; +pub fn fmtChar(c: u21) std.fmt.Formatter(u21, charEscape) { + return .{ .data = c }; } test fmtString { @@ -458,9 +458,7 @@ test fmtString { } test fmtChar { - try std.testing.expectFmt( - \\" \\ hi \x07 \x11 " derp \'" - , "\"{f}\"", .{fmtChar(" \\ hi \x07 \x11 \" derp '")}); + try std.testing.expectFmt("c \\u{26a1}", "{f} {f}", .{ fmtChar('c'), fmtChar('⚡') }); } /// Print the string as escaped contents of a double quoted string. @@ -480,21 +478,26 @@ pub fn stringEscape(bytes: []const u8, w: *Writer) Writer.Error!void { }; } -/// Print the string as escaped contents of a single-quoted string. -pub fn charEscape(bytes: []const u8, w: *Writer) Writer.Error!void { - for (bytes) |byte| switch (byte) { +/// Print as escaped contents of a single-quoted string. +pub fn charEscape(codepoint: u21, w: *Writer) Writer.Error!void { + switch (codepoint) { '\n' => try w.writeAll("\\n"), '\r' => try w.writeAll("\\r"), '\t' => try w.writeAll("\\t"), '\\' => try w.writeAll("\\\\"), - '"' => try w.writeByte('"'), '\'' => try w.writeAll("\\'"), - ' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(byte), + '"', ' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(@intCast(codepoint)), else => { - try w.writeAll("\\x"); - try w.printInt(byte, 16, .lower, .{ .width = 2, .fill = '0' }); + if (std.math.cast(u8, codepoint)) |byte| { + try w.writeAll("\\x"); + try w.printInt(byte, 16, .lower, .{ .width = 2, .fill = '0' }); + } else { + try w.writeAll("\\u{"); + try w.printInt(codepoint, 16, .lower, .{}); + try w.writeByte('}'); + } }, - }; + } } pub fn isValidId(bytes: []const u8) bool { diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig index 1f36c0fdbf..c15693fd62 100644 --- a/lib/std/zig/Ast.zig +++ b/lib/std/zig/Ast.zig @@ -574,7 +574,7 @@ pub fn renderError(tree: Ast, parse_error: Error, w: *Writer) Writer.Error!void '/' => "comment", else => unreachable, }, - std.zig.fmtChar(tok_slice[parse_error.extra.offset..][0..1]), + std.zig.fmtChar(tok_slice[parse_error.extra.offset]), }); }, diff --git a/lib/std/zig/Server.zig b/lib/std/zig/Server.zig index 8fc016d284..e932ed9a9d 100644 --- a/lib/std/zig/Server.zig +++ b/lib/std/zig/Server.zig @@ -203,8 +203,8 @@ pub const TestMetadata = struct { pub fn serveTestMetadata(s: *Server, test_metadata: TestMetadata) !void { const header: OutMessage.TestMetadata = .{ - .tests_len = @as(u32, @intCast(test_metadata.names.len)), - .string_bytes_len = @as(u32, @intCast(test_metadata.string_bytes.len)), + .tests_len = @intCast(test_metadata.names.len), + .string_bytes_len = @intCast(test_metadata.string_bytes.len), }; const trailing = 2; const bytes_len = @sizeOf(OutMessage.TestMetadata) + diff --git a/lib/std/zig/llvm/BitcodeReader.zig b/lib/std/zig/llvm/BitcodeReader.zig index f1d47e93c0..f691f12a8c 100644 --- a/lib/std/zig/llvm/BitcodeReader.zig +++ b/lib/std/zig/llvm/BitcodeReader.zig @@ -177,7 +177,7 @@ pub fn next(bc: *BitcodeReader) !?Item { pub fn skipBlock(bc: *BitcodeReader, block: Block) !void { assert(bc.bit_offset == 0); - try bc.reader.discardAll(4 * @as(u34, block.len)); + try bc.reader.discardAll(4 * @as(usize, block.len)); try bc.endBlock(); } diff --git a/lib/std/zon.zig b/lib/std/zon.zig index 252331057a..9ac02cf741 100644 --- a/lib/std/zon.zig +++ b/lib/std/zon.zig @@ -38,6 +38,7 @@ pub const parse = @import("zon/parse.zig"); pub const stringify = @import("zon/stringify.zig"); +pub const Serializer = @import("zon/Serializer.zig"); test { _ = parse; diff --git a/lib/std/zon/Serializer.zig b/lib/std/zon/Serializer.zig new file mode 100644 index 0000000000..b65b13bf97 --- /dev/null +++ b/lib/std/zon/Serializer.zig @@ -0,0 +1,929 @@ +//! Lower level control over serialization, you can create a new instance with `serializer`. +//! +//! Useful when you want control over which fields are serialized, how they're represented, +//! or want to write a ZON object that does not exist in memory. +//! +//! You can serialize values with `value`. To serialize recursive types, the following are provided: +//! * `valueMaxDepth` +//! * `valueArbitraryDepth` +//! +//! You can also serialize values using specific notations: +//! * `int` +//! * `float` +//! * `codePoint` +//! * `tuple` +//! * `tupleMaxDepth` +//! * `tupleArbitraryDepth` +//! * `string` +//! * `multilineString` +//! +//! For manual serialization of containers, see: +//! * `beginStruct` +//! * `beginTuple` + +options: Options = .{}, +indent_level: u8 = 0, +writer: *Writer, + +const Serializer = @This(); +const std = @import("std"); +const assert = std.debug.assert; +const Writer = std.Io.Writer; + +pub const Error = Writer.Error; +pub const DepthError = Error || error{ExceededMaxDepth}; + +pub const Options = struct { + /// If false, only syntactically necessary whitespace is emitted. + whitespace: bool = true, +}; + +/// Options for manual serialization of container types. +pub const ContainerOptions = struct { + /// The whitespace style that should be used for this container. Ignored if whitespace is off. + whitespace_style: union(enum) { + /// If true, wrap every field. If false do not. + wrap: bool, + /// Automatically decide whether to wrap or not based on the number of fields. Following + /// the standard rule of thumb, containers with more than two fields are wrapped. + fields: usize, + } = .{ .wrap = true }, + + fn shouldWrap(self: ContainerOptions) bool { + return switch (self.whitespace_style) { + .wrap => |wrap| wrap, + .fields => |fields| fields > 2, + }; + } +}; + +/// Options for serialization of an individual value. +/// +/// See `SerializeOptions` for more information on these options. +pub const ValueOptions = struct { + emit_codepoint_literals: EmitCodepointLiterals = .never, + emit_strings_as_containers: bool = false, + emit_default_optional_fields: bool = true, +}; + +/// Determines when to emit Unicode code point literals as opposed to integer literals. +pub const EmitCodepointLiterals = enum { + /// Never emit Unicode code point literals. + never, + /// Emit Unicode code point literals for any `u8` in the printable ASCII range. + printable_ascii, + /// Emit Unicode code point literals for any unsigned integer with 21 bits or fewer + /// whose value is a valid non-surrogate code point. + always, + + /// If the value should be emitted as a Unicode codepoint, return it as a u21. + fn emitAsCodepoint(self: @This(), val: anytype) ?u21 { + // Rule out incompatible integer types + switch (@typeInfo(@TypeOf(val))) { + .int => |int_info| if (int_info.signedness == .signed or int_info.bits > 21) { + return null; + }, + .comptime_int => {}, + else => comptime unreachable, + } + + // Return null if the value shouldn't be printed as a Unicode codepoint, or the value casted + // to a u21 if it should. + switch (self) { + .always => { + const c = std.math.cast(u21, val) orelse return null; + if (!std.unicode.utf8ValidCodepoint(c)) return null; + return c; + }, + .printable_ascii => { + const c = std.math.cast(u8, val) orelse return null; + if (!std.ascii.isPrint(c)) return null; + return c; + }, + .never => { + return null; + }, + } + } +}; + +/// Serialize a value, similar to `serialize`. +pub fn value(self: *Serializer, val: anytype, options: ValueOptions) Error!void { + comptime assert(!typeIsRecursive(@TypeOf(val))); + return self.valueArbitraryDepth(val, options); +} + +/// Serialize a value, similar to `serializeMaxDepth`. +/// Can return `error.ExceededMaxDepth`. +pub fn valueMaxDepth(self: *Serializer, val: anytype, options: ValueOptions, depth: usize) DepthError!void { + try checkValueDepth(val, depth); + return self.valueArbitraryDepth(val, options); +} + +/// Serialize a value, similar to `serializeArbitraryDepth`. +pub fn valueArbitraryDepth(self: *Serializer, val: anytype, options: ValueOptions) Error!void { + comptime assert(canSerializeType(@TypeOf(val))); + switch (@typeInfo(@TypeOf(val))) { + .int, .comptime_int => if (options.emit_codepoint_literals.emitAsCodepoint(val)) |c| { + self.codePoint(c) catch |err| switch (err) { + error.InvalidCodepoint => unreachable, // Already validated + else => |e| return e, + }; + } else { + try self.int(val); + }, + .float, .comptime_float => try self.float(val), + .bool, .null => try self.writer.print("{}", .{val}), + .enum_literal => try self.ident(@tagName(val)), + .@"enum" => try self.ident(@tagName(val)), + .pointer => |pointer| { + // Try to serialize as a string + const item: ?type = switch (@typeInfo(pointer.child)) { + .array => |array| array.child, + else => if (pointer.size == .slice) pointer.child else null, + }; + if (item == u8 and + (pointer.sentinel() == null or pointer.sentinel() == 0) and + !options.emit_strings_as_containers) + { + return try self.string(val); + } + + // Serialize as either a tuple or as the child type + switch (pointer.size) { + .slice => try self.tupleImpl(val, options), + .one => try self.valueArbitraryDepth(val.*, options), + else => comptime unreachable, + } + }, + .array => { + var container = try self.beginTuple( + .{ .whitespace_style = .{ .fields = val.len } }, + ); + for (val) |item_val| { + try container.fieldArbitraryDepth(item_val, options); + } + try container.end(); + }, + .@"struct" => |@"struct"| if (@"struct".is_tuple) { + var container = try self.beginTuple( + .{ .whitespace_style = .{ .fields = @"struct".fields.len } }, + ); + inline for (val) |field_value| { + try container.fieldArbitraryDepth(field_value, options); + } + try container.end(); + } else { + // Decide which fields to emit + const fields, const skipped: [@"struct".fields.len]bool = if (options.emit_default_optional_fields) b: { + break :b .{ @"struct".fields.len, @splat(false) }; + } else b: { + var fields = @"struct".fields.len; + var skipped: [@"struct".fields.len]bool = @splat(false); + inline for (@"struct".fields, &skipped) |field_info, *skip| { + if (field_info.default_value_ptr) |ptr| { + const default: *const field_info.type = @ptrCast(@alignCast(ptr)); + const field_value = @field(val, field_info.name); + if (std.meta.eql(field_value, default.*)) { + skip.* = true; + fields -= 1; + } + } + } + break :b .{ fields, skipped }; + }; + + // Emit those fields + var container = try self.beginStruct( + .{ .whitespace_style = .{ .fields = fields } }, + ); + inline for (@"struct".fields, skipped) |field_info, skip| { + if (!skip) { + try container.fieldArbitraryDepth( + field_info.name, + @field(val, field_info.name), + options, + ); + } + } + try container.end(); + }, + .@"union" => |@"union"| { + comptime assert(@"union".tag_type != null); + switch (val) { + inline else => |pl, tag| if (@TypeOf(pl) == void) + try self.writer.print(".{s}", .{@tagName(tag)}) + else { + var container = try self.beginStruct(.{ .whitespace_style = .{ .fields = 1 } }); + + try container.fieldArbitraryDepth( + @tagName(tag), + pl, + options, + ); + + try container.end(); + }, + } + }, + .optional => if (val) |inner| { + try self.valueArbitraryDepth(inner, options); + } else { + try self.writer.writeAll("null"); + }, + .vector => |vector| { + var container = try self.beginTuple( + .{ .whitespace_style = .{ .fields = vector.len } }, + ); + for (0..vector.len) |i| { + try container.fieldArbitraryDepth(val[i], options); + } + try container.end(); + }, + + else => comptime unreachable, + } +} + +/// Serialize an integer. +pub fn int(self: *Serializer, val: anytype) Error!void { + try self.writer.printInt(val, 10, .lower, .{}); +} + +/// Serialize a float. +pub fn float(self: *Serializer, val: anytype) Error!void { + switch (@typeInfo(@TypeOf(val))) { + .float => if (std.math.isNan(val)) { + return self.writer.writeAll("nan"); + } else if (std.math.isPositiveInf(val)) { + return self.writer.writeAll("inf"); + } else if (std.math.isNegativeInf(val)) { + return self.writer.writeAll("-inf"); + } else if (std.math.isNegativeZero(val)) { + return self.writer.writeAll("-0.0"); + } else { + try self.writer.print("{d}", .{val}); + }, + .comptime_float => if (val == 0) { + return self.writer.writeAll("0"); + } else { + try self.writer.print("{d}", .{val}); + }, + else => comptime unreachable, + } +} + +/// Serialize `name` as an identifier prefixed with `.`. +/// +/// Escapes the identifier if necessary. +pub fn ident(self: *Serializer, name: []const u8) Error!void { + try self.writer.print(".{f}", .{std.zig.fmtIdPU(name)}); +} + +pub const CodePointError = Error || error{InvalidCodepoint}; + +/// Serialize `val` as a Unicode codepoint. +/// +/// Returns `error.InvalidCodepoint` if `val` is not a valid Unicode codepoint. +pub fn codePoint(self: *Serializer, val: u21) CodePointError!void { + try self.writer.print("'{f}'", .{std.zig.fmtChar(val)}); +} + +/// Like `value`, but always serializes `val` as a tuple. +/// +/// Will fail at comptime if `val` is not a tuple, array, pointer to an array, or slice. +pub fn tuple(self: *Serializer, val: anytype, options: ValueOptions) Error!void { + comptime assert(!typeIsRecursive(@TypeOf(val))); + try self.tupleArbitraryDepth(val, options); +} + +/// Like `tuple`, but recursive types are allowed. +/// +/// Returns `error.ExceededMaxDepth` if `depth` is exceeded. +pub fn tupleMaxDepth( + self: *Serializer, + val: anytype, + options: ValueOptions, + depth: usize, +) DepthError!void { + try checkValueDepth(val, depth); + try self.tupleArbitraryDepth(val, options); +} + +/// Like `tuple`, but recursive types are allowed. +/// +/// It is the caller's responsibility to ensure that `val` does not contain cycles. +pub fn tupleArbitraryDepth( + self: *Serializer, + val: anytype, + options: ValueOptions, +) Error!void { + try self.tupleImpl(val, options); +} + +fn tupleImpl(self: *Serializer, val: anytype, options: ValueOptions) Error!void { + comptime assert(canSerializeType(@TypeOf(val))); + switch (@typeInfo(@TypeOf(val))) { + .@"struct" => { + var container = try self.beginTuple(.{ .whitespace_style = .{ .fields = val.len } }); + inline for (val) |item_val| { + try container.fieldArbitraryDepth(item_val, options); + } + try container.end(); + }, + .pointer, .array => { + var container = try self.beginTuple(.{ .whitespace_style = .{ .fields = val.len } }); + for (val) |item_val| { + try container.fieldArbitraryDepth(item_val, options); + } + try container.end(); + }, + else => comptime unreachable, + } +} + +/// Like `value`, but always serializes `val` as a string. +pub fn string(self: *Serializer, val: []const u8) Error!void { + try self.writer.print("\"{f}\"", .{std.zig.fmtString(val)}); +} + +/// Options for formatting multiline strings. +pub const MultilineStringOptions = struct { + /// If top level is true, whitespace before and after the multiline string is elided. + /// If it is true, a newline is printed, then the value, followed by a newline, and if + /// whitespace is true any necessary indentation follows. + top_level: bool = false, +}; + +pub const MultilineStringError = Error || error{InnerCarriageReturn}; + +/// Like `value`, but always serializes to a multiline string literal. +/// +/// Returns `error.InnerCarriageReturn` if `val` contains a CR not followed by a newline, +/// since multiline strings cannot represent CR without a following newline. +pub fn multilineString( + self: *Serializer, + val: []const u8, + options: MultilineStringOptions, +) MultilineStringError!void { + // Make sure the string does not contain any carriage returns not followed by a newline + var i: usize = 0; + while (i < val.len) : (i += 1) { + if (val[i] == '\r') { + if (i + 1 < val.len) { + if (val[i + 1] == '\n') { + i += 1; + continue; + } + } + return error.InnerCarriageReturn; + } + } + + if (!options.top_level) { + try self.newline(); + try self.indent(); + } + + try self.writer.writeAll("\\\\"); + for (val) |c| { + if (c != '\r') { + try self.writer.writeByte(c); // We write newlines here even if whitespace off + if (c == '\n') { + try self.indent(); + try self.writer.writeAll("\\\\"); + } + } + } + + if (!options.top_level) { + try self.writer.writeByte('\n'); // Even if whitespace off + try self.indent(); + } +} + +/// Create a `Struct` for writing ZON structs field by field. +pub fn beginStruct(self: *Serializer, options: ContainerOptions) Error!Struct { + return Struct.begin(self, options); +} + +/// Creates a `Tuple` for writing ZON tuples field by field. +pub fn beginTuple(self: *Serializer, options: ContainerOptions) Error!Tuple { + return Tuple.begin(self, options); +} + +fn indent(self: *Serializer) Error!void { + if (self.options.whitespace) { + try self.writer.splatByteAll(' ', 4 * self.indent_level); + } +} + +fn newline(self: *Serializer) Error!void { + if (self.options.whitespace) { + try self.writer.writeByte('\n'); + } +} + +fn newlineOrSpace(self: *Serializer, len: usize) Error!void { + if (self.containerShouldWrap(len)) { + try self.newline(); + } else { + try self.space(); + } +} + +fn space(self: *Serializer) Error!void { + if (self.options.whitespace) { + try self.writer.writeByte(' '); + } +} + +/// Writes ZON tuples field by field. +pub const Tuple = struct { + container: Container, + + fn begin(parent: *Serializer, options: ContainerOptions) Error!Tuple { + return .{ + .container = try Container.begin(parent, .anon, options), + }; + } + + /// Finishes serializing the tuple. + /// + /// Prints a trailing comma as configured when appropriate, and the closing bracket. + pub fn end(self: *Tuple) Error!void { + try self.container.end(); + self.* = undefined; + } + + /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `value`. + pub fn field( + self: *Tuple, + val: anytype, + options: ValueOptions, + ) Error!void { + try self.container.field(null, val, options); + } + + /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `valueMaxDepth`. + /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. + pub fn fieldMaxDepth( + self: *Tuple, + val: anytype, + options: ValueOptions, + depth: usize, + ) DepthError!void { + try self.container.fieldMaxDepth(null, val, options, depth); + } + + /// Serialize a field. Equivalent to calling `fieldPrefix` followed by + /// `valueArbitraryDepth`. + pub fn fieldArbitraryDepth( + self: *Tuple, + val: anytype, + options: ValueOptions, + ) Error!void { + try self.container.fieldArbitraryDepth(null, val, options); + } + + /// Starts a field with a struct as a value. Returns the struct. + pub fn beginStructField( + self: *Tuple, + options: ContainerOptions, + ) Error!Struct { + try self.fieldPrefix(); + return self.container.serializer.beginStruct(options); + } + + /// Starts a field with a tuple as a value. Returns the tuple. + pub fn beginTupleField( + self: *Tuple, + options: ContainerOptions, + ) Error!Tuple { + try self.fieldPrefix(); + return self.container.serializer.beginTuple(options); + } + + /// Print a field prefix. This prints any necessary commas, and whitespace as + /// configured. Useful if you want to serialize the field value yourself. + pub fn fieldPrefix(self: *Tuple) Error!void { + try self.container.fieldPrefix(null); + } +}; + +/// Writes ZON structs field by field. +pub const Struct = struct { + container: Container, + + fn begin(parent: *Serializer, options: ContainerOptions) Error!Struct { + return .{ + .container = try Container.begin(parent, .named, options), + }; + } + + /// Finishes serializing the struct. + /// + /// Prints a trailing comma as configured when appropriate, and the closing bracket. + pub fn end(self: *Struct) Error!void { + try self.container.end(); + self.* = undefined; + } + + /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `value`. + pub fn field( + self: *Struct, + name: []const u8, + val: anytype, + options: ValueOptions, + ) Error!void { + try self.container.field(name, val, options); + } + + /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `valueMaxDepth`. + /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. + pub fn fieldMaxDepth( + self: *Struct, + name: []const u8, + val: anytype, + options: ValueOptions, + depth: usize, + ) DepthError!void { + try self.container.fieldMaxDepth(name, val, options, depth); + } + + /// Serialize a field. Equivalent to calling `fieldPrefix` followed by + /// `valueArbitraryDepth`. + pub fn fieldArbitraryDepth( + self: *Struct, + name: []const u8, + val: anytype, + options: ValueOptions, + ) Error!void { + try self.container.fieldArbitraryDepth(name, val, options); + } + + /// Starts a field with a struct as a value. Returns the struct. + pub fn beginStructField( + self: *Struct, + name: []const u8, + options: ContainerOptions, + ) Error!Struct { + try self.fieldPrefix(name); + return self.container.serializer.beginStruct(options); + } + + /// Starts a field with a tuple as a value. Returns the tuple. + pub fn beginTupleField( + self: *Struct, + name: []const u8, + options: ContainerOptions, + ) Error!Tuple { + try self.fieldPrefix(name); + return self.container.serializer.beginTuple(options); + } + + /// Print a field prefix. This prints any necessary commas, the field name (escaped if + /// necessary) and whitespace as configured. Useful if you want to serialize the field + /// value yourself. + pub fn fieldPrefix(self: *Struct, name: []const u8) Error!void { + try self.container.fieldPrefix(name); + } +}; + +const Container = struct { + const FieldStyle = enum { named, anon }; + + serializer: *Serializer, + field_style: FieldStyle, + options: ContainerOptions, + empty: bool, + + fn begin( + sz: *Serializer, + field_style: FieldStyle, + options: ContainerOptions, + ) Error!Container { + if (options.shouldWrap()) sz.indent_level +|= 1; + try sz.writer.writeAll(".{"); + return .{ + .serializer = sz, + .field_style = field_style, + .options = options, + .empty = true, + }; + } + + fn end(self: *Container) Error!void { + if (self.options.shouldWrap()) self.serializer.indent_level -|= 1; + if (!self.empty) { + if (self.options.shouldWrap()) { + if (self.serializer.options.whitespace) { + try self.serializer.writer.writeByte(','); + } + try self.serializer.newline(); + try self.serializer.indent(); + } else if (!self.shouldElideSpaces()) { + try self.serializer.space(); + } + } + try self.serializer.writer.writeByte('}'); + self.* = undefined; + } + + fn fieldPrefix(self: *Container, name: ?[]const u8) Error!void { + if (!self.empty) { + try self.serializer.writer.writeByte(','); + } + self.empty = false; + if (self.options.shouldWrap()) { + try self.serializer.newline(); + } else if (!self.shouldElideSpaces()) { + try self.serializer.space(); + } + if (self.options.shouldWrap()) try self.serializer.indent(); + if (name) |n| { + try self.serializer.ident(n); + try self.serializer.space(); + try self.serializer.writer.writeByte('='); + try self.serializer.space(); + } + } + + fn field( + self: *Container, + name: ?[]const u8, + val: anytype, + options: ValueOptions, + ) Error!void { + comptime assert(!typeIsRecursive(@TypeOf(val))); + try self.fieldArbitraryDepth(name, val, options); + } + + /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. + fn fieldMaxDepth( + self: *Container, + name: ?[]const u8, + val: anytype, + options: ValueOptions, + depth: usize, + ) DepthError!void { + try checkValueDepth(val, depth); + try self.fieldArbitraryDepth(name, val, options); + } + + fn fieldArbitraryDepth( + self: *Container, + name: ?[]const u8, + val: anytype, + options: ValueOptions, + ) Error!void { + try self.fieldPrefix(name); + try self.serializer.valueArbitraryDepth(val, options); + } + + fn shouldElideSpaces(self: *const Container) bool { + return switch (self.options.whitespace_style) { + .fields => |fields| self.field_style != .named and fields == 1, + else => false, + }; + } +}; + +test Serializer { + var discarding: Writer.Discarding = .init(&.{}); + var s: Serializer = .{ .writer = &discarding.writer }; + var vec2 = try s.beginStruct(.{}); + try vec2.field("x", 1.5, .{}); + try vec2.fieldPrefix("prefix"); + try s.value(2.5, .{}); + try vec2.end(); +} + +inline fn typeIsRecursive(comptime T: type) bool { + return comptime typeIsRecursiveInner(T, &.{}); +} + +fn typeIsRecursiveInner(comptime T: type, comptime prev_visited: []const type) bool { + for (prev_visited) |V| { + if (V == T) return true; + } + const visited = prev_visited ++ .{T}; + + return switch (@typeInfo(T)) { + .pointer => |pointer| typeIsRecursiveInner(pointer.child, visited), + .optional => |optional| typeIsRecursiveInner(optional.child, visited), + .array => |array| typeIsRecursiveInner(array.child, visited), + .vector => |vector| typeIsRecursiveInner(vector.child, visited), + .@"struct" => |@"struct"| for (@"struct".fields) |field| { + if (typeIsRecursiveInner(field.type, visited)) break true; + } else false, + .@"union" => |@"union"| inline for (@"union".fields) |field| { + if (typeIsRecursiveInner(field.type, visited)) break true; + } else false, + else => false, + }; +} + +test typeIsRecursive { + try std.testing.expect(!typeIsRecursive(bool)); + try std.testing.expect(!typeIsRecursive(struct { x: i32, y: i32 })); + try std.testing.expect(!typeIsRecursive(struct { i32, i32 })); + try std.testing.expect(typeIsRecursive(struct { x: i32, y: i32, z: *@This() })); + try std.testing.expect(typeIsRecursive(struct { + a: struct { + const A = @This(); + b: struct { + c: *struct { + a: ?A, + }, + }, + }, + })); + try std.testing.expect(typeIsRecursive(struct { + a: [3]*@This(), + })); + try std.testing.expect(typeIsRecursive(struct { + a: union { a: i32, b: *@This() }, + })); +} + +fn checkValueDepth(val: anytype, depth: usize) error{ExceededMaxDepth}!void { + if (depth == 0) return error.ExceededMaxDepth; + const child_depth = depth - 1; + + switch (@typeInfo(@TypeOf(val))) { + .pointer => |pointer| switch (pointer.size) { + .one => try checkValueDepth(val.*, child_depth), + .slice => for (val) |item| { + try checkValueDepth(item, child_depth); + }, + .c, .many => {}, + }, + .array => for (val) |item| { + try checkValueDepth(item, child_depth); + }, + .@"struct" => |@"struct"| inline for (@"struct".fields) |field_info| { + try checkValueDepth(@field(val, field_info.name), child_depth); + }, + .@"union" => |@"union"| if (@"union".tag_type == null) { + return; + } else switch (val) { + inline else => |payload| { + return checkValueDepth(payload, child_depth); + }, + }, + .optional => if (val) |inner| try checkValueDepth(inner, child_depth), + else => {}, + } +} + +fn expectValueDepthEquals(expected: usize, v: anytype) !void { + try checkValueDepth(v, expected); + try std.testing.expectError(error.ExceededMaxDepth, checkValueDepth(v, expected - 1)); +} + +test checkValueDepth { + try expectValueDepthEquals(1, 10); + try expectValueDepthEquals(2, .{ .x = 1, .y = 2 }); + try expectValueDepthEquals(2, .{ 1, 2 }); + try expectValueDepthEquals(3, .{ 1, .{ 2, 3 } }); + try expectValueDepthEquals(3, .{ .{ 1, 2 }, 3 }); + try expectValueDepthEquals(3, .{ .x = 0, .y = 1, .z = .{ .x = 3 } }); + try expectValueDepthEquals(3, .{ .x = 0, .y = .{ .x = 1 }, .z = 2 }); + try expectValueDepthEquals(3, .{ .x = .{ .x = 0 }, .y = 1, .z = 2 }); + try expectValueDepthEquals(2, @as(?u32, 1)); + try expectValueDepthEquals(1, @as(?u32, null)); + try expectValueDepthEquals(1, null); + try expectValueDepthEquals(2, &1); + try expectValueDepthEquals(3, &@as(?u32, 1)); + + const Union = union(enum) { + x: u32, + y: struct { x: u32 }, + }; + try expectValueDepthEquals(2, Union{ .x = 1 }); + try expectValueDepthEquals(3, Union{ .y = .{ .x = 1 } }); + + const Recurse = struct { r: ?*const @This() }; + try expectValueDepthEquals(2, Recurse{ .r = null }); + try expectValueDepthEquals(5, Recurse{ .r = &Recurse{ .r = null } }); + try expectValueDepthEquals(8, Recurse{ .r = &Recurse{ .r = &Recurse{ .r = null } } }); + + try expectValueDepthEquals(2, @as([]const u8, &.{ 1, 2, 3 })); + try expectValueDepthEquals(3, @as([]const []const u8, &.{&.{ 1, 2, 3 }})); +} + +inline fn canSerializeType(T: type) bool { + comptime return canSerializeTypeInner(T, &.{}, false); +} + +fn canSerializeTypeInner( + T: type, + /// Visited structs and unions, to avoid infinite recursion. + /// Tracking more types is unnecessary, and a little complex due to optional nesting. + visited: []const type, + parent_is_optional: bool, +) bool { + return switch (@typeInfo(T)) { + .bool, + .int, + .float, + .comptime_float, + .comptime_int, + .null, + .enum_literal, + => true, + + .noreturn, + .void, + .type, + .undefined, + .error_union, + .error_set, + .@"fn", + .frame, + .@"anyframe", + .@"opaque", + => false, + + .@"enum" => |@"enum"| @"enum".is_exhaustive, + + .pointer => |pointer| switch (pointer.size) { + .one => canSerializeTypeInner(pointer.child, visited, parent_is_optional), + .slice => canSerializeTypeInner(pointer.child, visited, false), + .many, .c => false, + }, + + .optional => |optional| if (parent_is_optional) + false + else + canSerializeTypeInner(optional.child, visited, true), + + .array => |array| canSerializeTypeInner(array.child, visited, false), + .vector => |vector| canSerializeTypeInner(vector.child, visited, false), + + .@"struct" => |@"struct"| { + for (visited) |V| if (T == V) return true; + const new_visited = visited ++ .{T}; + for (@"struct".fields) |field| { + if (!canSerializeTypeInner(field.type, new_visited, false)) return false; + } + return true; + }, + .@"union" => |@"union"| { + for (visited) |V| if (T == V) return true; + const new_visited = visited ++ .{T}; + if (@"union".tag_type == null) return false; + for (@"union".fields) |field| { + if (field.type != void and !canSerializeTypeInner(field.type, new_visited, false)) { + return false; + } + } + return true; + }, + }; +} + +test canSerializeType { + try std.testing.expect(!comptime canSerializeType(void)); + try std.testing.expect(!comptime canSerializeType(struct { f: [*]u8 })); + try std.testing.expect(!comptime canSerializeType(struct { error{foo} })); + try std.testing.expect(!comptime canSerializeType(union(enum) { a: void, f: [*c]u8 })); + try std.testing.expect(!comptime canSerializeType(@Vector(0, [*c]u8))); + try std.testing.expect(!comptime canSerializeType(*?[*c]u8)); + try std.testing.expect(!comptime canSerializeType(enum(u8) { _ })); + try std.testing.expect(!comptime canSerializeType(union { foo: void })); + try std.testing.expect(comptime canSerializeType(union(enum) { foo: void })); + try std.testing.expect(comptime canSerializeType(comptime_float)); + try std.testing.expect(comptime canSerializeType(comptime_int)); + try std.testing.expect(!comptime canSerializeType(struct { comptime foo: ??u8 = null })); + try std.testing.expect(comptime canSerializeType(@TypeOf(.foo))); + try std.testing.expect(comptime canSerializeType(?u8)); + try std.testing.expect(comptime canSerializeType(*?*u8)); + try std.testing.expect(comptime canSerializeType(?struct { + foo: ?struct { + ?union(enum) { + a: ?@Vector(0, ?*u8), + }, + ?struct { + f: ?[]?u8, + }, + }, + })); + try std.testing.expect(!comptime canSerializeType(??u8)); + try std.testing.expect(!comptime canSerializeType(?*?u8)); + try std.testing.expect(!comptime canSerializeType(*?*?*u8)); + try std.testing.expect(comptime canSerializeType(struct { x: comptime_int = 2 })); + try std.testing.expect(comptime canSerializeType(struct { x: comptime_float = 2 })); + try std.testing.expect(comptime canSerializeType(struct { comptime_int })); + try std.testing.expect(comptime canSerializeType(struct { comptime x: @TypeOf(.foo) = .foo })); + const Recursive = struct { foo: ?*@This() }; + try std.testing.expect(comptime canSerializeType(Recursive)); + + // Make sure we validate nested optional before we early out due to already having seen + // a type recursion! + try std.testing.expect(!comptime canSerializeType(struct { + add_to_visited: ?u8, + retrieve_from_visited: ??u8, + })); +} diff --git a/lib/std/zon/parse.zig b/lib/std/zon/parse.zig index 171c189f5d..96a7fa6595 100644 --- a/lib/std/zon/parse.zig +++ b/lib/std/zon/parse.zig @@ -64,14 +64,14 @@ pub const Error = union(enum) { } }; - fn formatMessage(self: []const u8, w: *std.io.Writer) std.io.Writer.Error!void { + fn formatMessage(self: []const u8, w: *std.Io.Writer) std.Io.Writer.Error!void { // Just writes the string for now, but we're keeping this behind a formatter so we have // the option to extend it in the future to print more advanced messages (like `Error` // does) without breaking the API. try w.writeAll(self); } - pub fn fmtMessage(self: Note, diag: *const Diagnostics) std.fmt.Formatter([]const u8, Note.formatMessage) { + pub fn fmtMessage(self: Note, diag: *const Diagnostics) std.fmt.Alt([]const u8, Note.formatMessage) { return .{ .data = switch (self) { .zoir => |note| note.msg.get(diag.zoir), .type_check => |note| note.msg, @@ -147,14 +147,14 @@ pub const Error = union(enum) { diag: *const Diagnostics, }; - fn formatMessage(self: FormatMessage, w: *std.io.Writer) std.io.Writer.Error!void { + fn formatMessage(self: FormatMessage, w: *std.Io.Writer) std.Io.Writer.Error!void { switch (self.err) { .zoir => |err| try w.writeAll(err.msg.get(self.diag.zoir)), .type_check => |tc| try w.writeAll(tc.message), } } - pub fn fmtMessage(self: @This(), diag: *const Diagnostics) std.fmt.Formatter(FormatMessage, formatMessage) { + pub fn fmtMessage(self: @This(), diag: *const Diagnostics) std.fmt.Alt(FormatMessage, formatMessage) { return .{ .data = .{ .err = self, .diag = diag, @@ -226,7 +226,7 @@ pub const Diagnostics = struct { return .{ .diag = self }; } - pub fn format(self: *const @This(), w: *std.io.Writer) std.io.Writer.Error!void { + pub fn format(self: *const @This(), w: *std.Io.Writer) std.Io.Writer.Error!void { var errors = self.iterateErrors(); while (errors.next()) |err| { const loc = err.getLocation(self); diff --git a/lib/std/zon/stringify.zig b/lib/std/zon/stringify.zig index b6e7a4bcb7..785a303f22 100644 --- a/lib/std/zon/stringify.zig +++ b/lib/std/zon/stringify.zig @@ -22,14 +22,14 @@ const std = @import("std"); const assert = std.debug.assert; -const Writer = std.io.Writer; +const Writer = std.Io.Writer; +const Serializer = std.zon.Serializer; -/// Options for `serialize`. pub const SerializeOptions = struct { /// If false, whitespace is omitted. Otherwise whitespace is emitted in standard Zig style. whitespace: bool = true, /// Determines when to emit Unicode code point literals as opposed to integer literals. - emit_codepoint_literals: EmitCodepointLiterals = .never, + emit_codepoint_literals: Serializer.EmitCodepointLiterals = .never, /// If true, slices of `u8`s, and pointers to arrays of `u8` are serialized as containers. /// Otherwise they are serialized as string literals. emit_strings_as_containers: bool = false, @@ -93,102 +93,6 @@ pub fn serializeArbitraryDepth( }); } -inline fn typeIsRecursive(comptime T: type) bool { - return comptime typeIsRecursiveInner(T, &.{}); -} - -fn typeIsRecursiveInner(comptime T: type, comptime prev_visited: []const type) bool { - for (prev_visited) |V| { - if (V == T) return true; - } - const visited = prev_visited ++ .{T}; - - return switch (@typeInfo(T)) { - .pointer => |pointer| typeIsRecursiveInner(pointer.child, visited), - .optional => |optional| typeIsRecursiveInner(optional.child, visited), - .array => |array| typeIsRecursiveInner(array.child, visited), - .vector => |vector| typeIsRecursiveInner(vector.child, visited), - .@"struct" => |@"struct"| for (@"struct".fields) |field| { - if (typeIsRecursiveInner(field.type, visited)) break true; - } else false, - .@"union" => |@"union"| inline for (@"union".fields) |field| { - if (typeIsRecursiveInner(field.type, visited)) break true; - } else false, - else => false, - }; -} - -inline fn canSerializeType(T: type) bool { - comptime return canSerializeTypeInner(T, &.{}, false); -} - -fn canSerializeTypeInner( - T: type, - /// Visited structs and unions, to avoid infinite recursion. - /// Tracking more types is unnecessary, and a little complex due to optional nesting. - visited: []const type, - parent_is_optional: bool, -) bool { - return switch (@typeInfo(T)) { - .bool, - .int, - .float, - .comptime_float, - .comptime_int, - .null, - .enum_literal, - => true, - - .noreturn, - .void, - .type, - .undefined, - .error_union, - .error_set, - .@"fn", - .frame, - .@"anyframe", - .@"opaque", - => false, - - .@"enum" => |@"enum"| @"enum".is_exhaustive, - - .pointer => |pointer| switch (pointer.size) { - .one => canSerializeTypeInner(pointer.child, visited, parent_is_optional), - .slice => canSerializeTypeInner(pointer.child, visited, false), - .many, .c => false, - }, - - .optional => |optional| if (parent_is_optional) - false - else - canSerializeTypeInner(optional.child, visited, true), - - .array => |array| canSerializeTypeInner(array.child, visited, false), - .vector => |vector| canSerializeTypeInner(vector.child, visited, false), - - .@"struct" => |@"struct"| { - for (visited) |V| if (T == V) return true; - const new_visited = visited ++ .{T}; - for (@"struct".fields) |field| { - if (!canSerializeTypeInner(field.type, new_visited, false)) return false; - } - return true; - }, - .@"union" => |@"union"| { - for (visited) |V| if (T == V) return true; - const new_visited = visited ++ .{T}; - if (@"union".tag_type == null) return false; - for (@"union".fields) |field| { - if (field.type != void and !canSerializeTypeInner(field.type, new_visited, false)) { - return false; - } - } - return true; - }, - }; -} - fn isNestedOptional(T: type) bool { comptime switch (@typeInfo(T)) { .optional => |optional| return isNestedOptionalInner(optional.child), @@ -210,852 +114,13 @@ fn isNestedOptionalInner(T: type) bool { } } -test "std.zon stringify canSerializeType" { - try std.testing.expect(!comptime canSerializeType(void)); - try std.testing.expect(!comptime canSerializeType(struct { f: [*]u8 })); - try std.testing.expect(!comptime canSerializeType(struct { error{foo} })); - try std.testing.expect(!comptime canSerializeType(union(enum) { a: void, f: [*c]u8 })); - try std.testing.expect(!comptime canSerializeType(@Vector(0, [*c]u8))); - try std.testing.expect(!comptime canSerializeType(*?[*c]u8)); - try std.testing.expect(!comptime canSerializeType(enum(u8) { _ })); - try std.testing.expect(!comptime canSerializeType(union { foo: void })); - try std.testing.expect(comptime canSerializeType(union(enum) { foo: void })); - try std.testing.expect(comptime canSerializeType(comptime_float)); - try std.testing.expect(comptime canSerializeType(comptime_int)); - try std.testing.expect(!comptime canSerializeType(struct { comptime foo: ??u8 = null })); - try std.testing.expect(comptime canSerializeType(@TypeOf(.foo))); - try std.testing.expect(comptime canSerializeType(?u8)); - try std.testing.expect(comptime canSerializeType(*?*u8)); - try std.testing.expect(comptime canSerializeType(?struct { - foo: ?struct { - ?union(enum) { - a: ?@Vector(0, ?*u8), - }, - ?struct { - f: ?[]?u8, - }, - }, - })); - try std.testing.expect(!comptime canSerializeType(??u8)); - try std.testing.expect(!comptime canSerializeType(?*?u8)); - try std.testing.expect(!comptime canSerializeType(*?*?*u8)); - try std.testing.expect(comptime canSerializeType(struct { x: comptime_int = 2 })); - try std.testing.expect(comptime canSerializeType(struct { x: comptime_float = 2 })); - try std.testing.expect(comptime canSerializeType(struct { comptime_int })); - try std.testing.expect(comptime canSerializeType(struct { comptime x: @TypeOf(.foo) = .foo })); - const Recursive = struct { foo: ?*@This() }; - try std.testing.expect(comptime canSerializeType(Recursive)); - - // Make sure we validate nested optional before we early out due to already having seen - // a type recursion! - try std.testing.expect(!comptime canSerializeType(struct { - add_to_visited: ?u8, - retrieve_from_visited: ??u8, - })); -} - -test "std.zon typeIsRecursive" { - try std.testing.expect(!typeIsRecursive(bool)); - try std.testing.expect(!typeIsRecursive(struct { x: i32, y: i32 })); - try std.testing.expect(!typeIsRecursive(struct { i32, i32 })); - try std.testing.expect(typeIsRecursive(struct { x: i32, y: i32, z: *@This() })); - try std.testing.expect(typeIsRecursive(struct { - a: struct { - const A = @This(); - b: struct { - c: *struct { - a: ?A, - }, - }, - }, - })); - try std.testing.expect(typeIsRecursive(struct { - a: [3]*@This(), - })); - try std.testing.expect(typeIsRecursive(struct { - a: union { a: i32, b: *@This() }, - })); -} - -fn checkValueDepth(val: anytype, depth: usize) error{ExceededMaxDepth}!void { - if (depth == 0) return error.ExceededMaxDepth; - const child_depth = depth - 1; - - switch (@typeInfo(@TypeOf(val))) { - .pointer => |pointer| switch (pointer.size) { - .one => try checkValueDepth(val.*, child_depth), - .slice => for (val) |item| { - try checkValueDepth(item, child_depth); - }, - .c, .many => {}, - }, - .array => for (val) |item| { - try checkValueDepth(item, child_depth); - }, - .@"struct" => |@"struct"| inline for (@"struct".fields) |field_info| { - try checkValueDepth(@field(val, field_info.name), child_depth); - }, - .@"union" => |@"union"| if (@"union".tag_type == null) { - return; - } else switch (val) { - inline else => |payload| { - return checkValueDepth(payload, child_depth); - }, - }, - .optional => if (val) |inner| try checkValueDepth(inner, child_depth), - else => {}, - } -} - -fn expectValueDepthEquals(expected: usize, value: anytype) !void { - try checkValueDepth(value, expected); - try std.testing.expectError(error.ExceededMaxDepth, checkValueDepth(value, expected - 1)); -} - -test "std.zon checkValueDepth" { - try expectValueDepthEquals(1, 10); - try expectValueDepthEquals(2, .{ .x = 1, .y = 2 }); - try expectValueDepthEquals(2, .{ 1, 2 }); - try expectValueDepthEquals(3, .{ 1, .{ 2, 3 } }); - try expectValueDepthEquals(3, .{ .{ 1, 2 }, 3 }); - try expectValueDepthEquals(3, .{ .x = 0, .y = 1, .z = .{ .x = 3 } }); - try expectValueDepthEquals(3, .{ .x = 0, .y = .{ .x = 1 }, .z = 2 }); - try expectValueDepthEquals(3, .{ .x = .{ .x = 0 }, .y = 1, .z = 2 }); - try expectValueDepthEquals(2, @as(?u32, 1)); - try expectValueDepthEquals(1, @as(?u32, null)); - try expectValueDepthEquals(1, null); - try expectValueDepthEquals(2, &1); - try expectValueDepthEquals(3, &@as(?u32, 1)); - - const Union = union(enum) { - x: u32, - y: struct { x: u32 }, - }; - try expectValueDepthEquals(2, Union{ .x = 1 }); - try expectValueDepthEquals(3, Union{ .y = .{ .x = 1 } }); - - const Recurse = struct { r: ?*const @This() }; - try expectValueDepthEquals(2, Recurse{ .r = null }); - try expectValueDepthEquals(5, Recurse{ .r = &Recurse{ .r = null } }); - try expectValueDepthEquals(8, Recurse{ .r = &Recurse{ .r = &Recurse{ .r = null } } }); - - try expectValueDepthEquals(2, @as([]const u8, &.{ 1, 2, 3 })); - try expectValueDepthEquals(3, @as([]const []const u8, &.{&.{ 1, 2, 3 }})); -} - -/// Determines when to emit Unicode code point literals as opposed to integer literals. -pub const EmitCodepointLiterals = enum { - /// Never emit Unicode code point literals. - never, - /// Emit Unicode code point literals for any `u8` in the printable ASCII range. - printable_ascii, - /// Emit Unicode code point literals for any unsigned integer with 21 bits or fewer - /// whose value is a valid non-surrogate code point. - always, - - /// If the value should be emitted as a Unicode codepoint, return it as a u21. - fn emitAsCodepoint(self: @This(), val: anytype) ?u21 { - // Rule out incompatible integer types - switch (@typeInfo(@TypeOf(val))) { - .int => |int_info| if (int_info.signedness == .signed or int_info.bits > 21) { - return null; - }, - .comptime_int => {}, - else => comptime unreachable, - } - - // Return null if the value shouldn't be printed as a Unicode codepoint, or the value casted - // to a u21 if it should. - switch (self) { - .always => { - const c = std.math.cast(u21, val) orelse return null; - if (!std.unicode.utf8ValidCodepoint(c)) return null; - return c; - }, - .printable_ascii => { - const c = std.math.cast(u8, val) orelse return null; - if (!std.ascii.isPrint(c)) return null; - return c; - }, - .never => { - return null; - }, - } - } -}; - -/// Options for serialization of an individual value. -/// -/// See `SerializeOptions` for more information on these options. -pub const ValueOptions = struct { - emit_codepoint_literals: EmitCodepointLiterals = .never, - emit_strings_as_containers: bool = false, - emit_default_optional_fields: bool = true, -}; - -/// Options for manual serialization of container types. -pub const SerializeContainerOptions = struct { - /// The whitespace style that should be used for this container. Ignored if whitespace is off. - whitespace_style: union(enum) { - /// If true, wrap every field. If false do not. - wrap: bool, - /// Automatically decide whether to wrap or not based on the number of fields. Following - /// the standard rule of thumb, containers with more than two fields are wrapped. - fields: usize, - } = .{ .wrap = true }, - - fn shouldWrap(self: SerializeContainerOptions) bool { - return switch (self.whitespace_style) { - .wrap => |wrap| wrap, - .fields => |fields| fields > 2, - }; - } -}; - -/// Lower level control over serialization, you can create a new instance with `serializer`. -/// -/// Useful when you want control over which fields are serialized, how they're represented, -/// or want to write a ZON object that does not exist in memory. -/// -/// You can serialize values with `value`. To serialize recursive types, the following are provided: -/// * `valueMaxDepth` -/// * `valueArbitraryDepth` -/// -/// You can also serialize values using specific notations: -/// * `int` -/// * `float` -/// * `codePoint` -/// * `tuple` -/// * `tupleMaxDepth` -/// * `tupleArbitraryDepth` -/// * `string` -/// * `multilineString` -/// -/// For manual serialization of containers, see: -/// * `beginStruct` -/// * `beginTuple` -pub const Serializer = struct { - options: Options = .{}, - indent_level: u8 = 0, - writer: *Writer, - - pub const Error = Writer.Error; - pub const DepthError = Error || error{ExceededMaxDepth}; - - pub const Options = struct { - /// If false, only syntactically necessary whitespace is emitted. - whitespace: bool = true, - }; - - /// Serialize a value, similar to `serialize`. - pub fn value(self: *Serializer, val: anytype, options: ValueOptions) Error!void { - comptime assert(!typeIsRecursive(@TypeOf(val))); - return self.valueArbitraryDepth(val, options); - } - - /// Serialize a value, similar to `serializeMaxDepth`. - /// Can return `error.ExceededMaxDepth`. - pub fn valueMaxDepth(self: *Serializer, val: anytype, options: ValueOptions, depth: usize) DepthError!void { - try checkValueDepth(val, depth); - return self.valueArbitraryDepth(val, options); - } - - /// Serialize a value, similar to `serializeArbitraryDepth`. - pub fn valueArbitraryDepth(self: *Serializer, val: anytype, options: ValueOptions) Error!void { - comptime assert(canSerializeType(@TypeOf(val))); - switch (@typeInfo(@TypeOf(val))) { - .int, .comptime_int => if (options.emit_codepoint_literals.emitAsCodepoint(val)) |c| { - self.codePoint(c) catch |err| switch (err) { - error.InvalidCodepoint => unreachable, // Already validated - else => |e| return e, - }; - } else { - try self.int(val); - }, - .float, .comptime_float => try self.float(val), - .bool, .null => try std.fmt.format(self.writer, "{}", .{val}), - .enum_literal => try self.ident(@tagName(val)), - .@"enum" => try self.ident(@tagName(val)), - .pointer => |pointer| { - // Try to serialize as a string - const item: ?type = switch (@typeInfo(pointer.child)) { - .array => |array| array.child, - else => if (pointer.size == .slice) pointer.child else null, - }; - if (item == u8 and - (pointer.sentinel() == null or pointer.sentinel() == 0) and - !options.emit_strings_as_containers) - { - return try self.string(val); - } - - // Serialize as either a tuple or as the child type - switch (pointer.size) { - .slice => try self.tupleImpl(val, options), - .one => try self.valueArbitraryDepth(val.*, options), - else => comptime unreachable, - } - }, - .array => { - var container = try self.beginTuple( - .{ .whitespace_style = .{ .fields = val.len } }, - ); - for (val) |item_val| { - try container.fieldArbitraryDepth(item_val, options); - } - try container.end(); - }, - .@"struct" => |@"struct"| if (@"struct".is_tuple) { - var container = try self.beginTuple( - .{ .whitespace_style = .{ .fields = @"struct".fields.len } }, - ); - inline for (val) |field_value| { - try container.fieldArbitraryDepth(field_value, options); - } - try container.end(); - } else { - // Decide which fields to emit - const fields, const skipped: [@"struct".fields.len]bool = if (options.emit_default_optional_fields) b: { - break :b .{ @"struct".fields.len, @splat(false) }; - } else b: { - var fields = @"struct".fields.len; - var skipped: [@"struct".fields.len]bool = @splat(false); - inline for (@"struct".fields, &skipped) |field_info, *skip| { - if (field_info.default_value_ptr) |ptr| { - const default: *const field_info.type = @ptrCast(@alignCast(ptr)); - const field_value = @field(val, field_info.name); - if (std.meta.eql(field_value, default.*)) { - skip.* = true; - fields -= 1; - } - } - } - break :b .{ fields, skipped }; - }; - - // Emit those fields - var container = try self.beginStruct( - .{ .whitespace_style = .{ .fields = fields } }, - ); - inline for (@"struct".fields, skipped) |field_info, skip| { - if (!skip) { - try container.fieldArbitraryDepth( - field_info.name, - @field(val, field_info.name), - options, - ); - } - } - try container.end(); - }, - .@"union" => |@"union"| { - comptime assert(@"union".tag_type != null); - switch (val) { - inline else => |pl, tag| if (@TypeOf(pl) == void) - try self.writer.print(".{s}", .{@tagName(tag)}) - else { - var container = try self.beginStruct(.{ .whitespace_style = .{ .fields = 1 } }); - - try container.fieldArbitraryDepth( - @tagName(tag), - pl, - options, - ); - - try container.end(); - }, - } - }, - .optional => if (val) |inner| { - try self.valueArbitraryDepth(inner, options); - } else { - try self.writer.writeAll("null"); - }, - .vector => |vector| { - var container = try self.beginTuple( - .{ .whitespace_style = .{ .fields = vector.len } }, - ); - for (0..vector.len) |i| { - try container.fieldArbitraryDepth(val[i], options); - } - try container.end(); - }, - - else => comptime unreachable, - } - } - - /// Serialize an integer. - pub fn int(self: *Serializer, val: anytype) Error!void { - try self.writer.printIntOptions(val, 10, .lower, .{}); - } - - /// Serialize a float. - pub fn float(self: *Serializer, val: anytype) Error!void { - switch (@typeInfo(@TypeOf(val))) { - .float => if (std.math.isNan(val)) { - return self.writer.writeAll("nan"); - } else if (std.math.isPositiveInf(val)) { - return self.writer.writeAll("inf"); - } else if (std.math.isNegativeInf(val)) { - return self.writer.writeAll("-inf"); - } else if (std.math.isNegativeZero(val)) { - return self.writer.writeAll("-0.0"); - } else { - try std.fmt.format(self.writer, "{d}", .{val}); - }, - .comptime_float => if (val == 0) { - return self.writer.writeAll("0"); - } else { - try std.fmt.format(self.writer, "{d}", .{val}); - }, - else => comptime unreachable, - } - } - - /// Serialize `name` as an identifier prefixed with `.`. - /// - /// Escapes the identifier if necessary. - pub fn ident(self: *Serializer, name: []const u8) Error!void { - try self.writer.print(".{fp_}", .{std.zig.fmtId(name)}); - } - - pub const CodePointError = Error || error{InvalidCodepoint}; - - /// Serialize `val` as a Unicode codepoint. - /// - /// Returns `error.InvalidCodepoint` if `val` is not a valid Unicode codepoint. - pub fn codePoint(self: *Serializer, val: u21) CodePointError!void { - var buf: [8]u8 = undefined; - const len = std.unicode.utf8Encode(val, &buf) catch return error.InvalidCodepoint; - const str = buf[0..len]; - try std.fmt.format(self.writer, "'{f'}'", .{std.zig.fmtEscapes(str)}); - } - - /// Like `value`, but always serializes `val` as a tuple. - /// - /// Will fail at comptime if `val` is not a tuple, array, pointer to an array, or slice. - pub fn tuple(self: *Serializer, val: anytype, options: ValueOptions) Error!void { - comptime assert(!typeIsRecursive(@TypeOf(val))); - try self.tupleArbitraryDepth(val, options); - } - - /// Like `tuple`, but recursive types are allowed. - /// - /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. - pub fn tupleMaxDepth( - self: *Serializer, - val: anytype, - options: ValueOptions, - depth: usize, - ) DepthError!void { - try checkValueDepth(val, depth); - try self.tupleArbitraryDepth(val, options); - } - - /// Like `tuple`, but recursive types are allowed. - /// - /// It is the caller's responsibility to ensure that `val` does not contain cycles. - pub fn tupleArbitraryDepth( - self: *Serializer, - val: anytype, - options: ValueOptions, - ) Error!void { - try self.tupleImpl(val, options); - } - - fn tupleImpl(self: *Serializer, val: anytype, options: ValueOptions) Error!void { - comptime assert(canSerializeType(@TypeOf(val))); - switch (@typeInfo(@TypeOf(val))) { - .@"struct" => { - var container = try self.beginTuple(.{ .whitespace_style = .{ .fields = val.len } }); - inline for (val) |item_val| { - try container.fieldArbitraryDepth(item_val, options); - } - try container.end(); - }, - .pointer, .array => { - var container = try self.beginTuple(.{ .whitespace_style = .{ .fields = val.len } }); - for (val) |item_val| { - try container.fieldArbitraryDepth(item_val, options); - } - try container.end(); - }, - else => comptime unreachable, - } - } - - /// Like `value`, but always serializes `val` as a string. - pub fn string(self: *Serializer, val: []const u8) Error!void { - try std.fmt.format(self.writer, "\"{f}\"", .{std.zig.fmtEscapes(val)}); - } - - /// Options for formatting multiline strings. - pub const MultilineStringOptions = struct { - /// If top level is true, whitespace before and after the multiline string is elided. - /// If it is true, a newline is printed, then the value, followed by a newline, and if - /// whitespace is true any necessary indentation follows. - top_level: bool = false, - }; - - pub const MultilineStringError = Error || error{InnerCarriageReturn}; - - /// Like `value`, but always serializes to a multiline string literal. - /// - /// Returns `error.InnerCarriageReturn` if `val` contains a CR not followed by a newline, - /// since multiline strings cannot represent CR without a following newline. - pub fn multilineString( - self: *Serializer, - val: []const u8, - options: MultilineStringOptions, - ) MultilineStringError!void { - // Make sure the string does not contain any carriage returns not followed by a newline - var i: usize = 0; - while (i < val.len) : (i += 1) { - if (val[i] == '\r') { - if (i + 1 < val.len) { - if (val[i + 1] == '\n') { - i += 1; - continue; - } - } - return error.InnerCarriageReturn; - } - } - - if (!options.top_level) { - try self.newline(); - try self.indent(); - } - - try self.writer.writeAll("\\\\"); - for (val) |c| { - if (c != '\r') { - try self.writer.writeByte(c); // We write newlines here even if whitespace off - if (c == '\n') { - try self.indent(); - try self.writer.writeAll("\\\\"); - } - } - } - - if (!options.top_level) { - try self.writer.writeByte('\n'); // Even if whitespace off - try self.indent(); - } - } - - /// Create a `Struct` for writing ZON structs field by field. - pub fn beginStruct( - self: *Serializer, - options: SerializeContainerOptions, - ) Error!Struct { - return Struct.begin(self, options); - } - - /// Creates a `Tuple` for writing ZON tuples field by field. - pub fn beginTuple( - self: *Serializer, - options: SerializeContainerOptions, - ) Error!Tuple { - return Tuple.begin(self, options); - } - - fn indent(self: *Serializer) Error!void { - if (self.options.whitespace) { - try self.writer.splatByteAll(' ', 4 * self.indent_level); - } - } - - fn newline(self: *Serializer) Error!void { - if (self.options.whitespace) { - try self.writer.writeByte('\n'); - } - } - - fn newlineOrSpace(self: *Serializer, len: usize) Error!void { - if (self.containerShouldWrap(len)) { - try self.newline(); - } else { - try self.space(); - } - } - - fn space(self: *Serializer) Error!void { - if (self.options.whitespace) { - try self.writer.writeByte(' '); - } - } - - /// Writes ZON tuples field by field. - pub const Tuple = struct { - container: Container, - - fn begin(parent: *Serializer, options: SerializeContainerOptions) Error!Tuple { - return .{ - .container = try Container.begin(parent, .anon, options), - }; - } - - /// Finishes serializing the tuple. - /// - /// Prints a trailing comma as configured when appropriate, and the closing bracket. - pub fn end(self: *Tuple) Error!void { - try self.container.end(); - self.* = undefined; - } - - /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `value`. - pub fn field( - self: *Tuple, - val: anytype, - options: ValueOptions, - ) Error!void { - try self.container.field(null, val, options); - } - - /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `valueMaxDepth`. - /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. - pub fn fieldMaxDepth( - self: *Tuple, - val: anytype, - options: ValueOptions, - depth: usize, - ) DepthError!void { - try self.container.fieldMaxDepth(null, val, options, depth); - } - - /// Serialize a field. Equivalent to calling `fieldPrefix` followed by - /// `valueArbitraryDepth`. - pub fn fieldArbitraryDepth( - self: *Tuple, - val: anytype, - options: ValueOptions, - ) Error!void { - try self.container.fieldArbitraryDepth(null, val, options); - } - - /// Starts a field with a struct as a value. Returns the struct. - pub fn beginStructField( - self: *Tuple, - options: SerializeContainerOptions, - ) Error!Struct { - try self.fieldPrefix(); - return self.container.serializer.beginStruct(options); - } - - /// Starts a field with a tuple as a value. Returns the tuple. - pub fn beginTupleField( - self: *Tuple, - options: SerializeContainerOptions, - ) Error!Tuple { - try self.fieldPrefix(); - return self.container.serializer.beginTuple(options); - } - - /// Print a field prefix. This prints any necessary commas, and whitespace as - /// configured. Useful if you want to serialize the field value yourself. - pub fn fieldPrefix(self: *Tuple) Error!void { - try self.container.fieldPrefix(null); - } - }; - - /// Writes ZON structs field by field. - pub const Struct = struct { - container: Container, - - fn begin(parent: *Serializer, options: SerializeContainerOptions) Error!Struct { - return .{ - .container = try Container.begin(parent, .named, options), - }; - } - - /// Finishes serializing the struct. - /// - /// Prints a trailing comma as configured when appropriate, and the closing bracket. - pub fn end(self: *Struct) Error!void { - try self.container.end(); - self.* = undefined; - } - - /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `value`. - pub fn field( - self: *Struct, - name: []const u8, - val: anytype, - options: ValueOptions, - ) Error!void { - try self.container.field(name, val, options); - } - - /// Serialize a field. Equivalent to calling `fieldPrefix` followed by `valueMaxDepth`. - /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. - pub fn fieldMaxDepth( - self: *Struct, - name: []const u8, - val: anytype, - options: ValueOptions, - depth: usize, - ) DepthError!void { - try self.container.fieldMaxDepth(name, val, options, depth); - } - - /// Serialize a field. Equivalent to calling `fieldPrefix` followed by - /// `valueArbitraryDepth`. - pub fn fieldArbitraryDepth( - self: *Struct, - name: []const u8, - val: anytype, - options: ValueOptions, - ) Error!void { - try self.container.fieldArbitraryDepth(name, val, options); - } - - /// Starts a field with a struct as a value. Returns the struct. - pub fn beginStructField( - self: *Struct, - name: []const u8, - options: SerializeContainerOptions, - ) Error!Struct { - try self.fieldPrefix(name); - return self.container.serializer.beginStruct(options); - } - - /// Starts a field with a tuple as a value. Returns the tuple. - pub fn beginTupleField( - self: *Struct, - name: []const u8, - options: SerializeContainerOptions, - ) Error!Tuple { - try self.fieldPrefix(name); - return self.container.serializer.beginTuple(options); - } - - /// Print a field prefix. This prints any necessary commas, the field name (escaped if - /// necessary) and whitespace as configured. Useful if you want to serialize the field - /// value yourself. - pub fn fieldPrefix(self: *Struct, name: []const u8) Error!void { - try self.container.fieldPrefix(name); - } - }; - - const Container = struct { - const FieldStyle = enum { named, anon }; - - serializer: *Serializer, - field_style: FieldStyle, - options: SerializeContainerOptions, - empty: bool, - - fn begin( - sz: *Serializer, - field_style: FieldStyle, - options: SerializeContainerOptions, - ) Error!Container { - if (options.shouldWrap()) sz.indent_level +|= 1; - try sz.writer.writeAll(".{"); - return .{ - .serializer = sz, - .field_style = field_style, - .options = options, - .empty = true, - }; - } - - fn end(self: *Container) Error!void { - if (self.options.shouldWrap()) self.serializer.indent_level -|= 1; - if (!self.empty) { - if (self.options.shouldWrap()) { - if (self.serializer.options.whitespace) { - try self.serializer.writer.writeByte(','); - } - try self.serializer.newline(); - try self.serializer.indent(); - } else if (!self.shouldElideSpaces()) { - try self.serializer.space(); - } - } - try self.serializer.writer.writeByte('}'); - self.* = undefined; - } - - fn fieldPrefix(self: *Container, name: ?[]const u8) Error!void { - if (!self.empty) { - try self.serializer.writer.writeByte(','); - } - self.empty = false; - if (self.options.shouldWrap()) { - try self.serializer.newline(); - } else if (!self.shouldElideSpaces()) { - try self.serializer.space(); - } - if (self.options.shouldWrap()) try self.serializer.indent(); - if (name) |n| { - try self.serializer.ident(n); - try self.serializer.space(); - try self.serializer.writer.writeByte('='); - try self.serializer.space(); - } - } - - fn field( - self: *Container, - name: ?[]const u8, - val: anytype, - options: ValueOptions, - ) Error!void { - comptime assert(!typeIsRecursive(@TypeOf(val))); - try self.fieldArbitraryDepth(name, val, options); - } - - /// Returns `error.ExceededMaxDepth` if `depth` is exceeded. - fn fieldMaxDepth( - self: *Container, - name: ?[]const u8, - val: anytype, - options: ValueOptions, - depth: usize, - ) DepthError!void { - try checkValueDepth(val, depth); - try self.fieldArbitraryDepth(name, val, options); - } - - fn fieldArbitraryDepth( - self: *Container, - name: ?[]const u8, - val: anytype, - options: ValueOptions, - ) Error!void { - try self.fieldPrefix(name); - try self.serializer.valueArbitraryDepth(val, options); - } - - fn shouldElideSpaces(self: *const Container) bool { - return switch (self.options.whitespace_style) { - .fields => |fields| self.field_style != .named and fields == 1, - else => false, - }; - } - }; -}; - -test Serializer { - var w: Writer = .discarding(&.{}); - var s: Serializer = .{ .writer = &w }; - var vec2 = try s.beginStruct(.{}); - try vec2.field("x", 1.5, .{}); - try vec2.fieldPrefix("prefix"); - try s.value(2.5, .{}); - try vec2.end(); -} - fn expectSerializeEqual( expected: []const u8, value: anytype, options: SerializeOptions, ) !void { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - const bw = &aw.interface; + var aw: Writer.Allocating = .init(std.testing.allocator); + const bw = &aw.writer; defer aw.deinit(); try serialize(value, options, bw); @@ -1156,8 +221,8 @@ test "std.zon stringify whitespace, high level API" { } test "std.zon stringify whitespace, low level API" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); for ([2]bool{ true, false }) |whitespace| { @@ -1513,8 +578,8 @@ test "std.zon stringify whitespace, low level API" { } test "std.zon stringify utf8 codepoints" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); // Printable ASCII @@ -1565,11 +630,11 @@ test "std.zon stringify utf8 codepoints" { aw.clearRetainingCapacity(); try s.codePoint('⚡'); - try std.testing.expectEqualStrings("'\\xe2\\x9a\\xa1'", aw.getWritten()); + try std.testing.expectEqualStrings("'\\u{26a1}'", aw.getWritten()); aw.clearRetainingCapacity(); try s.value('⚡', .{ .emit_codepoint_literals = .always }); - try std.testing.expectEqualStrings("'\\xe2\\x9a\\xa1'", aw.getWritten()); + try std.testing.expectEqualStrings("'\\u{26a1}'", aw.getWritten()); aw.clearRetainingCapacity(); try s.value('⚡', .{ .emit_codepoint_literals = .printable_ascii }); @@ -1581,7 +646,9 @@ test "std.zon stringify utf8 codepoints" { aw.clearRetainingCapacity(); // Invalid codepoint - try std.testing.expectError(error.InvalidCodepoint, s.codePoint(0x110000 + 1)); + try s.codePoint(0x110000 + 1); + try std.testing.expectEqualStrings("'\\u{110001}'", aw.getWritten()); + aw.clearRetainingCapacity(); try s.int(0x110000 + 1); try std.testing.expectEqualStrings("1114113", aw.getWritten()); @@ -1614,7 +681,7 @@ test "std.zon stringify utf8 codepoints" { // Make sure value options are passed to children try s.value(.{ .c = '⚡' }, .{ .emit_codepoint_literals = .always }); - try std.testing.expectEqualStrings(".{ .c = '\\xe2\\x9a\\xa1' }", aw.getWritten()); + try std.testing.expectEqualStrings(".{ .c = '\\u{26a1}' }", aw.getWritten()); aw.clearRetainingCapacity(); try s.value(.{ .c = '⚡' }, .{ .emit_codepoint_literals = .never }); @@ -1623,8 +690,8 @@ test "std.zon stringify utf8 codepoints" { } test "std.zon stringify strings" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); // Minimal case @@ -1693,8 +760,8 @@ test "std.zon stringify strings" { } test "std.zon stringify multiline strings" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); inline for (.{ true, false }) |whitespace| { @@ -1913,8 +980,8 @@ test "std.zon stringify skip default fields" { } test "std.zon depth limits" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - const bw = &aw.interface; + var aw: Writer.Allocating = .init(std.testing.allocator); + const bw = &aw.writer; defer aw.deinit(); const Recurse = struct { r: []const @This() }; @@ -2174,8 +1241,8 @@ test "std.zon stringify primitives" { } test "std.zon stringify ident" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); try expectSerializeEqual(".{ .a = 0 }", .{ .a = 0 }, .{}); @@ -2221,8 +1288,8 @@ test "std.zon stringify ident" { } test "std.zon stringify as tuple" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); // Tuples @@ -2242,8 +1309,8 @@ test "std.zon stringify as tuple" { } test "std.zon stringify as float" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); // Comptime float @@ -2346,8 +1413,8 @@ test "std.zon pointers" { } test "std.zon tuple/struct field" { - var aw: std.io.Writer.Allocating = .init(std.testing.allocator); - var s: Serializer = .{ .writer = &aw.interface }; + var aw: Writer.Allocating = .init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.writer }; defer aw.deinit(); // Test on structs diff --git a/src/Zcu.zig b/src/Zcu.zig index 897bb6e89e..d337f0b943 100644 --- a/src/Zcu.zig +++ b/src/Zcu.zig @@ -2821,7 +2821,7 @@ pub fn loadZirCache(gpa: Allocator, cache_file: std.fs.File) !Zir { var buffer: [2000]u8 = undefined; var file_reader = cache_file.reader(&buffer); return result: { - const header = file_reader.interface.takeStructReference(Zir.Header) catch |err| break :result err; + const header = file_reader.interface.takeStructPointer(Zir.Header) catch |err| break :result err; break :result loadZirCacheBody(gpa, header.*, &file_reader.interface); } catch |err| switch (err) { error.ReadFailed => return file_reader.err.?, diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index dc2308add1..26f008e1c8 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -349,7 +349,7 @@ fn loadZirZoirCache( const cache_br = &cache_fr.interface; // First we read the header to determine the lengths of arrays. - const header = (cache_br.takeStructReference(Header) catch |err| switch (err) { + const header = (cache_br.takeStructPointer(Header) catch |err| switch (err) { error.ReadFailed => return cache_fr.err.?, // This can happen if Zig bails out of this function between creating // the cached file and writing it. diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 955a83bc33..c726c05e1b 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -2438,7 +2438,10 @@ pub const DeclGen = struct { const ty = val.typeOf(zcu); return .{ .data = .{ .dg = dg, - .int_info = ty.intInfo(zcu), + .int_info = if (ty.zigTypeTag(zcu) == .@"union" and ty.containerLayout(zcu) == .@"packed") + .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) } + else + ty.intInfo(zcu), .kind = kind, .ctype = try dg.ctypeFromType(ty, kind), .val = val, diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index b3391cfb6a..a570dd5ec0 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -6385,6 +6385,9 @@ pub const FuncGen = struct { // * https://github.com/llvm/llvm-project/blob/56905dab7da50bccfcceaeb496b206ff476127e1/llvm/test/MC/WebAssembly/blockaddress.ll if (zcu.comp.getTarget().cpu.arch.isWasm()) break :jmp_table null; + // Workaround for https://github.com/ziglang/zig/issues/24383: + if (self.ng.ownerModule().optimize_mode == .ReleaseSafe) break :jmp_table null; + // On a 64-bit target, 1024 pointers in our jump table is about 8K of pointers. This seems just // about acceptable - it won't fill L1d cache on most CPUs. const max_table_len = 1024; diff --git a/src/main.zig b/src/main.zig index dc931f119b..7ad40e1a68 100644 --- a/src/main.zig +++ b/src/main.zig @@ -346,8 +346,9 @@ fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, cmd, "targets")) { dev.check(.targets_command); const host = std.zig.resolveTargetQueryOrFatal(.{}); - const stdout = fs.File.stdout().deprecatedWriter(); - return @import("print_targets.zig").cmdTargets(arena, cmd_args, stdout, &host); + var stdout_writer = fs.File.stdout().writer(&stdout_buffer); + try @import("print_targets.zig").cmdTargets(arena, cmd_args, &stdout_writer.interface, &host); + return stdout_writer.interface.flush(); } else if (mem.eql(u8, cmd, "version")) { dev.check(.version_command); try fs.File.stdout().writeAll(build_options.version ++ "\n"); @@ -358,7 +359,9 @@ fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, cmd, "env")) { dev.check(.env_command); verifyLibcxxCorrectlyLinked(); - return @import("print_env.zig").cmdEnv(arena, cmd_args); + var stdout_writer = fs.File.stdout().writer(&stdout_buffer); + try @import("print_env.zig").cmdEnv(arena, &stdout_writer.interface); + return stdout_writer.interface.flush(); } else if (mem.eql(u8, cmd, "reduce")) { return jitCmd(gpa, arena, cmd_args, .{ .cmd_name = "reduce", diff --git a/src/print_env.zig b/src/print_env.zig index e76fceb3bd..d1251c0d62 100644 --- a/src/print_env.zig +++ b/src/print_env.zig @@ -4,8 +4,7 @@ const introspect = @import("introspect.zig"); const Allocator = std.mem.Allocator; const fatal = std.process.fatal; -pub fn cmdEnv(arena: Allocator, args: []const []const u8) !void { - _ = args; +pub fn cmdEnv(arena: Allocator, out: *std.Io.Writer) !void { const cwd_path = try introspect.getResolvedCwd(arena); const self_exe_path = try std.fs.selfExePathAlloc(arena); @@ -21,41 +20,21 @@ pub fn cmdEnv(arena: Allocator, args: []const []const u8) !void { const host = try std.zig.system.resolveTargetQuery(.{}); const triple = try host.zigTriple(arena); - var buffer: [1024]u8 = undefined; - var stdout_writer = std.fs.File.stdout().writer(&buffer); - const w = &stdout_writer.interface(); - var jws: std.json.Stringify = .{ .writer = w, .options = .{ .whitespace = .indent_1 } }; + var serializer: std.zon.Serializer = .{ .writer = out }; + var root = try serializer.beginStruct(.{}); - try jws.beginObject(); - - try jws.objectField("zig_exe"); - try jws.write(self_exe_path); - - try jws.objectField("lib_dir"); - try jws.write(zig_lib_directory.path.?); - - try jws.objectField("std_dir"); - try jws.write(zig_std_dir); - - try jws.objectField("global_cache_dir"); - try jws.write(global_cache_dir); - - try jws.objectField("version"); - try jws.write(build_options.version); - - try jws.objectField("target"); - try jws.write(triple); - - try jws.objectField("env"); - try jws.beginObject(); + try root.field("zig_exe", self_exe_path, .{}); + try root.field("lib_dir", zig_lib_directory.path.?, .{}); + try root.field("std_dir", zig_std_dir, .{}); + try root.field("global_cache_dir", global_cache_dir, .{}); + try root.field("version", build_options.version, .{}); + try root.field("target", triple, .{}); + var env = try root.beginStructField("env", .{}); inline for (@typeInfo(std.zig.EnvVar).@"enum".fields) |field| { - try jws.objectField(field.name); - try jws.write(try @field(std.zig.EnvVar, field.name).get(arena)); + try env.field(field.name, try @field(std.zig.EnvVar, field.name).get(arena), .{}); } - try jws.endObject(); + try env.end(); + try root.end(); - try jws.endObject(); - try w.writeByte('\n'); - - try w.flush(); + try out.writeByte('\n'); } diff --git a/src/print_targets.zig b/src/print_targets.zig index c46d11cd8a..e04842bb7e 100644 --- a/src/print_targets.zig +++ b/src/print_targets.zig @@ -10,38 +10,37 @@ const target = @import("target.zig"); const assert = std.debug.assert; const glibc = @import("libs/glibc.zig"); const introspect = @import("introspect.zig"); -const Writer = std.io.Writer; -pub fn cmdTargets(arena: Allocator, args: []const []const u8) !void { +pub fn cmdTargets( + allocator: Allocator, + args: []const []const u8, + out: *std.Io.Writer, + native_target: *const Target, +) !void { _ = args; - const host = std.zig.resolveTargetQueryOrFatal(.{}); - var buffer: [1024]u8 = undefined; - var bw = fs.File.stdout().writer().buffered(&buffer); - try print(arena, &bw, host); - try bw.flush(); -} - -fn print(arena: Allocator, output: *Writer, host: *const Target) Writer.Error!void { - var zig_lib_directory = introspect.findZigLibDir(arena) catch |err| { + var zig_lib_directory = introspect.findZigLibDir(allocator) catch |err| { fatal("unable to find zig installation directory: {s}\n", .{@errorName(err)}); }; defer zig_lib_directory.handle.close(); + defer allocator.free(zig_lib_directory.path.?); const abilists_contents = zig_lib_directory.handle.readFileAlloc( + allocator, glibc.abilists_path, - arena, - .limited(glibc.abilists_max_size), + glibc.abilists_max_size, ) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, else => fatal("unable to read " ++ glibc.abilists_path ++ ": {s}", .{@errorName(err)}), }; + defer allocator.free(abilists_contents); - const glibc_abi = try glibc.loadMetaData(arena, abilists_contents); + const glibc_abi = try glibc.loadMetaData(allocator, abilists_contents); + defer glibc_abi.destroy(allocator); - var sz: std.zon.stringify.Serializer = .{ .writer = output }; + var serializer: std.zon.Serializer = .{ .writer = out }; { - var root_obj = try sz.beginStruct(.{}); + var root_obj = try serializer.beginStruct(.{}); try root_obj.field("arch", meta.fieldNames(Target.Cpu.Arch), .{}); try root_obj.field("os", meta.fieldNames(Target.Os.Tag), .{}); @@ -50,9 +49,10 @@ fn print(arena: Allocator, output: *Writer, host: *const Target) Writer.Error!vo { var libc_obj = try root_obj.beginTupleField("libc", .{}); for (std.zig.target.available_libcs) |libc| { - const tmp = try std.fmt.allocPrint(arena, "{s}-{s}-{s}", .{ + const tmp = try std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ @tagName(libc.arch), @tagName(libc.os), @tagName(libc.abi), }); + defer allocator.free(tmp); try libc_obj.field(tmp, .{}); } try libc_obj.end(); @@ -61,7 +61,8 @@ fn print(arena: Allocator, output: *Writer, host: *const Target) Writer.Error!vo { var glibc_obj = try root_obj.beginTupleField("glibc", .{}); for (glibc_abi.all_versions) |ver| { - const tmp = try std.fmt.allocPrint(arena, "{f}", .{ver}); + const tmp = try std.fmt.allocPrint(allocator, "{f}", .{ver}); + defer allocator.free(tmp); try glibc_obj.field(tmp, .{}); } try glibc_obj.end(); @@ -101,20 +102,21 @@ fn print(arena: Allocator, output: *Writer, host: *const Target) Writer.Error!vo { var native_obj = try root_obj.beginStructField("native", .{}); { - const triple = try host.zigTriple(arena); + const triple = try native_target.zigTriple(allocator); + defer allocator.free(triple); try native_obj.field("triple", triple, .{}); } { var cpu_obj = try native_obj.beginStructField("cpu", .{}); - try cpu_obj.field("arch", @tagName(host.cpu.arch), .{}); + try cpu_obj.field("arch", @tagName(native_target.cpu.arch), .{}); - try cpu_obj.field("name", host.cpu.model.name, .{}); + try cpu_obj.field("name", native_target.cpu.model.name, .{}); { var features = try native_obj.beginTupleField("features", .{}); - for (host.cpu.arch.allFeaturesList(), 0..) |feature, i_usize| { + for (native_target.cpu.arch.allFeaturesList(), 0..) |feature, i_usize| { const index = @as(Target.Cpu.Feature.Set.Index, @intCast(i_usize)); - if (host.cpu.features.isEnabled(index)) { + if (native_target.cpu.features.isEnabled(index)) { try features.field(feature.name, .{}); } } @@ -123,13 +125,13 @@ fn print(arena: Allocator, output: *Writer, host: *const Target) Writer.Error!vo try cpu_obj.end(); } - try native_obj.field("os", @tagName(host.os.tag), .{}); - try native_obj.field("abi", @tagName(host.abi), .{}); + try native_obj.field("os", @tagName(native_target.os.tag), .{}); + try native_obj.field("abi", @tagName(native_target.abi), .{}); try native_obj.end(); } try root_obj.end(); } - try output.writeByte('\n'); + try out.writeByte('\n'); } diff --git a/src/translate_c.zig b/src/translate_c.zig index 301e0a219d..f1f3ad8659 100644 --- a/src/translate_c.zig +++ b/src/translate_c.zig @@ -3338,7 +3338,7 @@ fn transPredefinedExpr(c: *Context, scope: *Scope, expr: *const clang.Predefined fn transCreateCharLitNode(c: *Context, narrow: bool, val: u32) TransError!Node { return Tag.char_literal.create(c.arena, if (narrow) - try std.fmt.allocPrint(c.arena, "'{f}'", .{std.zig.fmtChar(&.{@as(u8, @intCast(val))})}) + try std.fmt.allocPrint(c.arena, "'{f}'", .{std.zig.fmtChar(@intCast(val))}) else try std.fmt.allocPrint(c.arena, "'\\u{{{x}}}'", .{val})); } diff --git a/test/behavior/type_info.zig b/test/behavior/type_info.zig index f19d90696b..aea8cbb6aa 100644 --- a/test/behavior/type_info.zig +++ b/test/behavior/type_info.zig @@ -539,20 +539,6 @@ fn add(a: i32, b: i32) i32 { return a + b; } -test "type info for async frames" { - if (true) { - // https://github.com/ziglang/zig/issues/6025 - return error.SkipZigTest; - } - - switch (@typeInfo(@Frame(add))) { - .frame => |frame| { - try expect(@as(@TypeOf(add), @ptrCast(frame.function)) == add); - }, - else => unreachable, - } -} - test "Declarations are returned in declaration order" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; diff --git a/test/cases/compile_errors/async/Frame_of_generic_function.zig b/test/cases/compile_errors/async/Frame_of_generic_function.zig deleted file mode 100644 index af0fb5c72e..0000000000 --- a/test/cases/compile_errors/async/Frame_of_generic_function.zig +++ /dev/null @@ -1,14 +0,0 @@ -export fn entry() void { - var frame: @Frame(func) = undefined; - _ = &frame; -} -fn func(comptime T: type) void { - var x: T = undefined; - _ = &x; -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:2:16: error: @Frame() of generic function diff --git a/test/cases/compile_errors/async/bad_alignment_in_asynccall.zig b/test/cases/compile_errors/async/bad_alignment_in_asynccall.zig deleted file mode 100644 index c30815e7e0..0000000000 --- a/test/cases/compile_errors/async/bad_alignment_in_asynccall.zig +++ /dev/null @@ -1,13 +0,0 @@ -export fn entry() void { - var ptr: fn () callconv(.@"async") void = func; - var bytes: [64]u8 = undefined; - _ = @asyncCall(&bytes, {}, ptr, .{}); - _ = &ptr; -} -fn func() callconv(.@"async") void {} - -// error -// backend=stage1 -// target=aarch64-linux-none -// -// tmp.zig:4:21: error: expected type '[]align(8) u8', found '*[64]u8' diff --git a/test/cases/compile_errors/async/exported_async_function.zig b/test/cases/compile_errors/async/exported_async_function.zig deleted file mode 100644 index c3be7d4b80..0000000000 --- a/test/cases/compile_errors/async/exported_async_function.zig +++ /dev/null @@ -1,7 +0,0 @@ -export fn foo() callconv(.@"async") void {} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:1:1: error: exported function cannot be async diff --git a/test/cases/compile_errors/async/frame_called_outside_of_function_definition.zig b/test/cases/compile_errors/async/frame_called_outside_of_function_definition.zig deleted file mode 100644 index d140998152..0000000000 --- a/test/cases/compile_errors/async/frame_called_outside_of_function_definition.zig +++ /dev/null @@ -1,11 +0,0 @@ -var handle_undef: anyframe = undefined; -var handle_dummy: anyframe = @frame(); -export fn entry() bool { - return handle_undef == handle_dummy; -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:2:30: error: @frame() called outside of function definition diff --git a/test/cases/compile_errors/async/frame_causes_function_to_be_async.zig b/test/cases/compile_errors/async/frame_causes_function_to_be_async.zig deleted file mode 100644 index f8493b08b2..0000000000 --- a/test/cases/compile_errors/async/frame_causes_function_to_be_async.zig +++ /dev/null @@ -1,13 +0,0 @@ -export fn entry() void { - func(); -} -fn func() void { - _ = @frame(); -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:1:1: error: function with calling convention 'C' cannot be async -// tmp.zig:5:9: note: @frame() causes function to be async diff --git a/test/cases/compile_errors/async/non-async_function_pointer_eventually_is_inferred_to_become_async.zig b/test/cases/compile_errors/async/non-async_function_pointer_eventually_is_inferred_to_become_async.zig deleted file mode 100644 index e18b420028..0000000000 --- a/test/cases/compile_errors/async/non-async_function_pointer_eventually_is_inferred_to_become_async.zig +++ /dev/null @@ -1,15 +0,0 @@ -export fn a() void { - var non_async_fn: fn () void = undefined; - non_async_fn = func; -} -fn func() void { - suspend {} -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:5:1: error: 'func' cannot be async -// tmp.zig:3:20: note: required to be non-async here -// tmp.zig:6:5: note: suspends here diff --git a/test/cases/compile_errors/async/non_async_function_pointer_passed_to_asyncCall.zig b/test/cases/compile_errors/async/non_async_function_pointer_passed_to_asyncCall.zig deleted file mode 100644 index b62524f6de..0000000000 --- a/test/cases/compile_errors/async/non_async_function_pointer_passed_to_asyncCall.zig +++ /dev/null @@ -1,13 +0,0 @@ -export fn entry() void { - var ptr = afunc; - var bytes: [100]u8 align(16) = undefined; - _ = @asyncCall(&bytes, {}, ptr, .{}); - _ = &ptr; -} -fn afunc() void {} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:4:32: error: expected async function, found 'fn () void' diff --git a/test/cases/compile_errors/async/prevent_bad_implicit_casting_of_anyframe_types.zig b/test/cases/compile_errors/async/prevent_bad_implicit_casting_of_anyframe_types.zig deleted file mode 100644 index 6ab99bf00d..0000000000 --- a/test/cases/compile_errors/async/prevent_bad_implicit_casting_of_anyframe_types.zig +++ /dev/null @@ -1,24 +0,0 @@ -export fn a() void { - var x: anyframe = undefined; - var y: anyframe->i32 = x; - _ = .{ &x, &y }; -} -export fn b() void { - var x: i32 = undefined; - var y: anyframe->i32 = x; - _ = .{ &x, &y }; -} -export fn c() void { - var x: @Frame(func) = undefined; - var y: anyframe->i32 = &x; - _ = .{ &x, &y }; -} -fn func() void {} - -// error -// backend=stage1 -// target=native -// -// :3:28: error: expected type 'anyframe->i32', found 'anyframe' -// :8:28: error: expected type 'anyframe->i32', found 'i32' -// tmp.zig:13:29: error: expected type 'anyframe->i32', found '*@Frame(func)' diff --git a/test/cases/compile_errors/async/wrong_type_for_argument_tuple_to_asyncCall.zig b/test/cases/compile_errors/async/wrong_type_for_argument_tuple_to_asyncCall.zig deleted file mode 100644 index 7a9be0a8cc..0000000000 --- a/test/cases/compile_errors/async/wrong_type_for_argument_tuple_to_asyncCall.zig +++ /dev/null @@ -1,14 +0,0 @@ -export fn entry1() void { - var frame: @Frame(foo) = undefined; - @asyncCall(&frame, {}, foo, {}); -} - -fn foo() i32 { - return 0; -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:3:33: error: expected tuple or struct, found 'void' diff --git a/test/cases/safety/nosuspend function call, callee suspends.zig b/test/cases/safety/nosuspend function call, callee suspends.zig deleted file mode 100644 index 50f457f314..0000000000 --- a/test/cases/safety/nosuspend function call, callee suspends.zig +++ /dev/null @@ -1,20 +0,0 @@ -const std = @import("std"); - -pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, _: ?usize) noreturn { - _ = message; - _ = stack_trace; - std.process.exit(0); -} -pub fn main() !void { - _ = nosuspend add(101, 100); - return error.TestFailed; -} -fn add(a: i32, b: i32) i32 { - if (a > 100) { - suspend {} - } - return a + b; -} -// run -// backend=stage1 -// target=native