From fbdc5154184b0752175dcafc5bfdc4ea6a0cfebf Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 19 Jun 2021 10:45:56 +0200 Subject: [PATCH] link: add basic TAPI parser for linkers Parser uses kubkon/zig-yaml gitrev c3eae1e40a02aedd44ad1171e5c8b259896cbda0 --- CMakeLists.txt | 5 + src/link/tapi.zig | 67 ++++ src/link/tapi/Tokenizer.zig | 439 ++++++++++++++++++++++ src/link/tapi/parse.zig | 705 +++++++++++++++++++++++++++++++++++ src/link/tapi/parse/test.zig | 556 +++++++++++++++++++++++++++ src/link/tapi/yaml.zig | 652 ++++++++++++++++++++++++++++++++ 6 files changed, 2424 insertions(+) create mode 100644 src/link/tapi.zig create mode 100644 src/link/tapi/Tokenizer.zig create mode 100644 src/link/tapi/parse.zig create mode 100644 src/link/tapi/parse/test.zig create mode 100644 src/link/tapi/yaml.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index bc946e2a34..f279990e27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -588,6 +588,11 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/aarch64.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/x86_64.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" + "${CMAKE_SOURCE_DIR}/src/link/tapi.zig" + "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig" + "${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig" + "${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig" + "${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig" "${CMAKE_SOURCE_DIR}/src/link/C/zig.h" "${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin" "${CMAKE_SOURCE_DIR}/src/liveness.zig" diff --git a/src/link/tapi.zig b/src/link/tapi.zig new file mode 100644 index 0000000000..447485950e --- /dev/null +++ b/src/link/tapi.zig @@ -0,0 +1,67 @@ +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; +const log = std.log.scoped(.tapi); + +const Allocator = mem.Allocator; +const Yaml = @import("tapi/yaml.zig").Yaml; + +pub const LibStub = struct { + /// Underlying memory for stub's contents. + yaml: Yaml, + + /// Typed contents of the tbd file. + inner: []Tbd, + + const Tbd = struct { + tbd_version: u3, + targets: []const []const u8, + uuids: []const struct { + target: []const u8, + value: []const u8, + }, + install_name: []const u8, + current_version: union(enum) { + string: []const u8, + int: u32, + }, + reexported_libraries: ?[]const struct { + targets: []const []const u8, + libraries: []const []const u8, + }, + parent_umbrella: ?[]const struct { + targets: []const []const u8, + umbrella: []const u8, + }, + exports: []const struct { + targets: []const []const u8, + symbols: []const []const u8, + }, + allowable_clients: ?[]const struct { + targets: []const []const u8, + clients: []const []const u8, + }, + objc_classes: ?[]const []const u8, + }; + + pub fn loadFromFile(allocator: *Allocator, file_path: []const u8) !LibStub { + const file = try fs.cwd().openFile(file_path, .{}); + defer file.close(); + + const source = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + defer allocator.free(source); + + var lib_stub = LibStub{ + .yaml = try Yaml.load(allocator, source), + .inner = undefined, + }; + + lib_stub.inner = try lib_stub.yaml.parse([]Tbd); + + return lib_stub; + } + + pub fn deinit(self: *LibStub) void { + self.yaml.deinit(); + } +}; diff --git a/src/link/tapi/Tokenizer.zig b/src/link/tapi/Tokenizer.zig new file mode 100644 index 0000000000..37fcedbfce --- /dev/null +++ b/src/link/tapi/Tokenizer.zig @@ -0,0 +1,439 @@ +const Tokenizer = @This(); + +const std = @import("std"); +const log = std.log.scoped(.tapi); +const testing = std.testing; + +buffer: []const u8, +index: usize = 0, + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + // Count of spaces/tabs. + // Only active for .Space and .Tab tokens. + count: ?usize = null, + + pub const Id = enum { + Eof, + + NewLine, + DocStart, // --- + DocEnd, // ... + SeqItemInd, // - + MapValueInd, // : + FlowMapStart, // { + FlowMapEnd, // } + FlowSeqStart, // [ + FlowSeqEnd, // ] + + Comma, + Space, + Tab, + Comment, // # + Alias, // * + Anchor, // & + Tag, // ! + SingleQuote, // ' + DoubleQuote, // " + + Literal, + }; +}; + +pub const TokenIndex = usize; + +pub const TokenIterator = struct { + buffer: []const Token, + pos: TokenIndex = 0, + + pub fn next(self: *TokenIterator) Token { + const token = self.buffer[self.pos]; + self.pos += 1; + return token; + } + + pub fn peek(self: TokenIterator) ?Token { + if (self.pos >= self.buffer.len) return null; + return self.buffer[self.pos]; + } + + pub fn reset(self: *TokenIterator) void { + self.pos = 0; + } + + pub fn seekTo(self: *TokenIterator, pos: TokenIndex) void { + self.pos = pos; + } + + pub fn seekBy(self: *TokenIterator, offset: isize) void { + const new_pos = @bitCast(isize, self.pos) + offset; + if (new_pos < 0) { + self.pos = 0; + } else { + self.pos = @intCast(usize, new_pos); + } + } +}; + +pub fn next(self: *Tokenizer) Token { + var result = Token{ + .id = .Eof, + .start = self.index, + .end = undefined, + }; + + var state: union(enum) { + Start, + NewLine, + Space: usize, + Tab: usize, + Hyphen: usize, + Dot: usize, + Literal, + } = .Start; + + while (self.index < self.buffer.len) : (self.index += 1) { + const c = self.buffer[self.index]; + switch (state) { + .Start => switch (c) { + ' ' => { + state = .{ .Space = 1 }; + }, + '\t' => { + state = .{ .Tab = 1 }; + }, + '\n' => { + result.id = .NewLine; + self.index += 1; + break; + }, + '\r' => { + state = .NewLine; + }, + '-' => { + state = .{ .Hyphen = 1 }; + }, + '.' => { + state = .{ .Dot = 1 }; + }, + ',' => { + result.id = .Comma; + self.index += 1; + break; + }, + '#' => { + result.id = .Comment; + self.index += 1; + break; + }, + '*' => { + result.id = .Alias; + self.index += 1; + break; + }, + '&' => { + result.id = .Anchor; + self.index += 1; + break; + }, + '!' => { + result.id = .Tag; + self.index += 1; + break; + }, + '\'' => { + result.id = .SingleQuote; + self.index += 1; + break; + }, + '"' => { + result.id = .DoubleQuote; + self.index += 1; + break; + }, + '[' => { + result.id = .FlowSeqStart; + self.index += 1; + break; + }, + ']' => { + result.id = .FlowSeqEnd; + self.index += 1; + break; + }, + ':' => { + result.id = .MapValueInd; + self.index += 1; + break; + }, + '{' => { + result.id = .FlowMapStart; + self.index += 1; + break; + }, + '}' => { + result.id = .FlowMapEnd; + self.index += 1; + break; + }, + else => { + state = .Literal; + }, + }, + .Space => |*count| switch (c) { + ' ' => { + count.* += 1; + }, + else => { + result.id = .Space; + result.count = count.*; + break; + }, + }, + .Tab => |*count| switch (c) { + ' ' => { + count.* += 1; + }, + else => { + result.id = .Tab; + result.count = count.*; + break; + }, + }, + .NewLine => switch (c) { + '\n' => { + result.id = .NewLine; + self.index += 1; + break; + }, + else => {}, // TODO this should be an error condition + }, + .Hyphen => |*count| switch (c) { + ' ' => { + result.id = .SeqItemInd; + self.index += 1; + break; + }, + '-' => { + count.* += 1; + + if (count.* == 3) { + result.id = .DocStart; + self.index += 1; + break; + } + }, + else => { + state = .Literal; + }, + }, + .Dot => |*count| switch (c) { + '.' => { + count.* += 1; + + if (count.* == 3) { + result.id = .DocEnd; + self.index += 1; + break; + } + }, + else => { + state = .Literal; + }, + }, + .Literal => switch (c) { + '\r', '\n', ' ', '\'', '"', ',', ':', ']', '}' => { + result.id = .Literal; + break; + }, + else => { + result.id = .Literal; + }, + }, + } + } + + if (state == .Literal and result.id == .Eof) { + result.id = .Literal; + } + + result.end = self.index; + + log.debug("{any}", .{result}); + log.debug(" | {s}", .{self.buffer[result.start..result.end]}); + + return result; +} + +fn testExpected(source: []const u8, expected: []const Token.Id) !void { + var tokenizer = Tokenizer{ + .buffer = source, + }; + + for (expected) |exp| { + const token = tokenizer.next(); + try testing.expectEqual(exp, token.id); + } +} + +test "empty doc" { + try testExpected("", &[_]Token.Id{.Eof}); +} + +test "empty doc with explicit markers" { + try testExpected( + \\--- + \\... + , &[_]Token.Id{ + .DocStart, .NewLine, .DocEnd, .Eof, + }); +} + +test "sequence of values" { + try testExpected( + \\- 0 + \\- 1 + \\- 2 + , &[_]Token.Id{ + .SeqItemInd, + .Literal, + .NewLine, + .SeqItemInd, + .Literal, + .NewLine, + .SeqItemInd, + .Literal, + .Eof, + }); +} + +test "sequence of sequences" { + try testExpected( + \\- [ val1, val2] + \\- [val3, val4 ] + , &[_]Token.Id{ + .SeqItemInd, + .FlowSeqStart, + .Space, + .Literal, + .Comma, + .Space, + .Literal, + .FlowSeqEnd, + .NewLine, + .SeqItemInd, + .FlowSeqStart, + .Literal, + .Comma, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .Eof, + }); +} + +test "mappings" { + try testExpected( + \\key1: value1 + \\key2: value2 + , &[_]Token.Id{ + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + .Eof, + }); +} + +test "inline mapped sequence of values" { + try testExpected( + \\key : [ val1, + \\ val2 ] + , &[_]Token.Id{ + .Literal, + .Space, + .MapValueInd, + .Space, + .FlowSeqStart, + .Space, + .Literal, + .Comma, + .Space, + .NewLine, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .Eof, + }); +} + +test "part of tdb" { + try testExpected( + \\--- !tapi-tbd + \\tbd-version: 4 + \\targets: [ x86_64-macos ] + \\ + \\uuids: + \\ - target: x86_64-macos + \\ value: F86CC732-D5E4-30B5-AA7D-167DF5EC2708 + \\ + \\install-name: '/usr/lib/libSystem.B.dylib' + \\... + , &[_]Token.Id{ + .DocStart, + .Space, + .Tag, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .FlowSeqStart, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .NewLine, + .NewLine, + .Literal, + .MapValueInd, + .NewLine, + .Space, + .SeqItemInd, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Space, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .SingleQuote, + .Literal, + .SingleQuote, + .NewLine, + .DocEnd, + .Eof, + }); +} diff --git a/src/link/tapi/parse.zig b/src/link/tapi/parse.zig new file mode 100644 index 0000000000..811ac27c68 --- /dev/null +++ b/src/link/tapi/parse.zig @@ -0,0 +1,705 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.tapi); +const mem = std.mem; +const testing = std.testing; + +const Allocator = mem.Allocator; +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const TokenIndex = Tokenizer.TokenIndex; +const TokenIterator = Tokenizer.TokenIterator; + +pub const ParseError = error{ + MalformedYaml, + NestedDocuments, + UnexpectedTag, + UnexpectedEof, + UnexpectedToken, + Unhandled, +} || Allocator.Error; + +pub const Node = struct { + tag: Tag, + tree: *const Tree, + + pub const Tag = enum { + doc, + map, + list, + value, + }; + + pub fn cast(self: *const Node, comptime T: type) ?*const T { + if (self.tag != T.base_tag) { + return null; + } + return @fieldParentPtr(T, "base", self); + } + + pub fn deinit(self: *Node, allocator: *Allocator) void { + switch (self.tag) { + .doc => @fieldParentPtr(Node.Doc, "base", self).deinit(allocator), + .map => @fieldParentPtr(Node.Map, "base", self).deinit(allocator), + .list => @fieldParentPtr(Node.List, "base", self).deinit(allocator), + .value => @fieldParentPtr(Node.Value, "base", self).deinit(allocator), + } + } + + pub fn format( + self: *const Node, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + return switch (self.tag) { + .doc => @fieldParentPtr(Node.Doc, "base", self).format(fmt, options, writer), + .map => @fieldParentPtr(Node.Map, "base", self).format(fmt, options, writer), + .list => @fieldParentPtr(Node.List, "base", self).format(fmt, options, writer), + .value => @fieldParentPtr(Node.Value, "base", self).format(fmt, options, writer), + }; + } + + pub const Doc = struct { + base: Node = Node{ .tag = Tag.doc, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + directive: ?TokenIndex = null, + value: ?*Node = null, + + pub const base_tag: Node.Tag = .doc; + + pub fn deinit(self: *Doc, allocator: *Allocator) void { + if (self.value) |node| { + node.deinit(allocator); + allocator.destroy(node); + } + } + + pub fn format( + self: *const Doc, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + if (self.directive) |id| { + try std.fmt.format(writer, "{{ ", .{}); + const directive = self.base.tree.tokens[id]; + try std.fmt.format(writer, ".directive = {s}, ", .{ + self.base.tree.source[directive.start..directive.end], + }); + } + if (self.value) |node| { + try std.fmt.format(writer, "{}", .{node}); + } + if (self.directive != null) { + try std.fmt.format(writer, " }}", .{}); + } + } + }; + + pub const Map = struct { + base: Node = Node{ .tag = Tag.map, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + values: std.ArrayListUnmanaged(Entry) = .{}, + + pub const base_tag: Node.Tag = .map; + + pub const Entry = struct { + key: TokenIndex, + value: *Node, + }; + + pub fn deinit(self: *Map, allocator: *Allocator) void { + for (self.values.items) |entry| { + entry.value.deinit(allocator); + allocator.destroy(entry.value); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const Map, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + try std.fmt.format(writer, "{{ ", .{}); + for (self.values.items) |entry| { + const key = self.base.tree.tokens[entry.key]; + try std.fmt.format(writer, "{s} => {}, ", .{ + self.base.tree.source[key.start..key.end], + entry.value, + }); + } + return std.fmt.format(writer, " }}", .{}); + } + }; + + pub const List = struct { + base: Node = Node{ .tag = Tag.list, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + values: std.ArrayListUnmanaged(*Node) = .{}, + + pub const base_tag: Node.Tag = .list; + + pub fn deinit(self: *List, allocator: *Allocator) void { + for (self.values.items) |node| { + node.deinit(allocator); + allocator.destroy(node); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const List, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + try std.fmt.format(writer, "[ ", .{}); + for (self.values.items) |node| { + try std.fmt.format(writer, "{}, ", .{node}); + } + return std.fmt.format(writer, " ]", .{}); + } + }; + + pub const Value = struct { + base: Node = Node{ .tag = Tag.value, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + + pub const base_tag: Node.Tag = .value; + + pub fn deinit(self: *Value, allocator: *Allocator) void {} + + pub fn format( + self: *const Value, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + const start = self.base.tree.tokens[self.start.?]; + const end = self.base.tree.tokens[self.end.?]; + return std.fmt.format(writer, "{s}", .{ + self.base.tree.source[start.start..end.end], + }); + } + }; +}; + +pub const Tree = struct { + allocator: *Allocator, + source: []const u8, + tokens: []Token, + docs: std.ArrayListUnmanaged(*Node) = .{}, + + pub fn init(allocator: *Allocator) Tree { + return .{ + .allocator = allocator, + .source = undefined, + .tokens = undefined, + }; + } + + pub fn deinit(self: *Tree) void { + self.allocator.free(self.tokens); + for (self.docs.items) |doc| { + doc.deinit(self.allocator); + self.allocator.destroy(doc); + } + self.docs.deinit(self.allocator); + } + + pub fn parse(self: *Tree, source: []const u8) !void { + var tokenizer = Tokenizer{ .buffer = source }; + var tokens = std.ArrayList(Token).init(self.allocator); + errdefer tokens.deinit(); + + while (true) { + const token = tokenizer.next(); + try tokens.append(token); + if (token.id == .Eof) break; + } + + self.source = source; + self.tokens = tokens.toOwnedSlice(); + + var it = TokenIterator{ .buffer = self.tokens }; + var parser = Parser{ + .allocator = self.allocator, + .tree = self, + .token_it = &it, + }; + defer parser.deinit(); + + try parser.scopes.append(self.allocator, .{ + .indent = 0, + }); + + while (true) { + if (parser.token_it.peek() == null) return; + const pos = parser.token_it.pos; + const token = parser.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + switch (token.id) { + .Space, .Comment, .NewLine => {}, + .Eof => break, + else => { + const doc = try parser.doc(pos); + try self.docs.append(self.allocator, &doc.base); + }, + } + } + } +}; + +const Parser = struct { + allocator: *Allocator, + tree: *Tree, + token_it: *TokenIterator, + scopes: std.ArrayListUnmanaged(Scope) = .{}, + + const Scope = struct { + indent: usize, + }; + + fn deinit(self: *Parser) void { + self.scopes.deinit(self.allocator); + } + + fn doc(self: *Parser, start: TokenIndex) ParseError!*Node.Doc { + const node = try self.allocator.create(Node.Doc); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Doc start: {}, {}", .{ start, self.tree.tokens[start] }); + + const explicit_doc: bool = if (self.eatToken(.DocStart)) |_| explicit_doc: { + if (self.eatToken(.Tag)) |_| { + node.directive = try self.expectToken(.Literal); + } + _ = try self.expectToken(.NewLine); + break :explicit_doc true; + } else false; + + while (true) { + const pos = self.token_it.pos; + const token = self.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + switch (token.id) { + .Tag => { + return error.UnexpectedTag; + }, + .Literal, .SingleQuote, .DoubleQuote => { + _ = try self.expectToken(.MapValueInd); + const map_node = try self.map(pos); + node.value = &map_node.base; + }, + .SeqItemInd => { + const list_node = try self.list(pos); + node.value = &list_node.base; + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + node.value = &list_node.base; + }, + .DocEnd => { + if (explicit_doc) break; + return error.UnexpectedToken; + }, + .DocStart, .Eof => { + self.token_it.seekBy(-1); + break; + }, + else => { + return error.UnexpectedToken; + }, + } + } + + node.end = self.token_it.pos - 1; + + log.debug("Doc end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn map(self: *Parser, start: TokenIndex) ParseError!*Node.Map { + const node = try self.allocator.create(Node.Map); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Map start: {}, {}", .{ start, self.tree.tokens[start] }); + log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]}); + + while (true) { + // Parse key. + const key_pos = self.token_it.pos; + const key = self.token_it.next(); + switch (key.id) { + .Literal => {}, + else => { + self.token_it.seekBy(-1); + break; + }, + } + + log.debug("Map key: {}, '{s}'", .{ key, self.tree.source[key.start..key.end] }); + + // Separator + _ = try self.expectToken(.MapValueInd); + self.eatCommentsAndSpace(); + + // Parse value. + const value: *Node = value: { + if (self.eatToken(.NewLine)) |_| { + // Explicit, complex value such as list or map. + try self.openScope(); + const value_pos = self.token_it.pos; + const value = self.token_it.next(); + switch (value.id) { + .Literal, .SingleQuote, .DoubleQuote => { + // Assume nested map. + const map_node = try self.map(value_pos); + break :value &map_node.base; + }, + .SeqItemInd => { + // Assume list of values. + const list_node = try self.list(value_pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{key}); + return error.Unhandled; + }, + } + } else { + const value_pos = self.token_it.pos; + const value = self.token_it.next(); + switch (value.id) { + .Literal, .SingleQuote, .DoubleQuote => { + // Assume leaf value. + const leaf_node = try self.leaf_value(value_pos); + break :value &leaf_node.base; + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(value_pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{key}); + return error.Unhandled; + }, + } + } + }; + log.debug("Map value: {}", .{value}); + + try node.values.append(self.allocator, .{ + .key = key_pos, + .value = value, + }); + + if (self.eatToken(.NewLine)) |_| { + if (try self.closeScope()) { + break; + } + } + } + + node.end = self.token_it.pos - 1; + + log.debug("Map end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn list(self: *Parser, start: TokenIndex) ParseError!*Node.List { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] }); + log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]}); + + while (true) { + _ = self.eatToken(.SeqItemInd) orelse { + _ = try self.closeScope(); + break; + }; + self.eatCommentsAndSpace(); + + const pos = self.token_it.pos; + const token = self.token_it.next(); + const value: *Node = value: { + switch (token.id) { + .Literal, .SingleQuote, .DoubleQuote => { + if (self.eatToken(.MapValueInd)) |_| { + if (self.eatToken(.NewLine)) |_| { + try self.openScope(); + } + // nested map + const map_node = try self.map(pos); + break :value &map_node.base; + } else { + // standalone (leaf) value + const leaf_node = try self.leaf_value(pos); + break :value &leaf_node.base; + } + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{token}); + return error.Unhandled; + }, + } + }; + try node.values.append(self.allocator, value); + + _ = self.eatToken(.NewLine); + } + + node.end = self.token_it.pos - 1; + + log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn list_bracketed(self: *Parser, start: TokenIndex) ParseError!*Node.List { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] }); + log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]}); + + _ = try self.expectToken(.FlowSeqStart); + + while (true) { + _ = self.eatToken(.NewLine); + self.eatCommentsAndSpace(); + + const pos = self.token_it.pos; + const token = self.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + const value: *Node = value: { + switch (token.id) { + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + break :value &list_node.base; + }, + .FlowSeqEnd => { + break; + }, + .Literal, .SingleQuote, .DoubleQuote => { + const leaf_node = try self.leaf_value(pos); + _ = self.eatToken(.Comma); + // TODO newline + break :value &leaf_node.base; + }, + else => { + log.err("{}", .{token}); + return error.Unhandled; + }, + } + }; + try node.values.append(self.allocator, value); + } + + node.end = self.token_it.pos - 1; + + log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn leaf_value(self: *Parser, start: TokenIndex) ParseError!*Node.Value { + const node = try self.allocator.create(Node.Value); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Leaf start: {}, {}", .{ node.start.?, self.tree.tokens[node.start.?] }); + + parse: { + if (self.eatToken(.SingleQuote)) |_| { + node.start = node.start.? + 1; + while (true) { + const pos = self.token_it.pos; + const tok = self.token_it.next(); + switch (tok.id) { + .SingleQuote => { + node.end = self.token_it.pos - 2; + break :parse; + }, + .NewLine => return error.UnexpectedToken, + else => {}, + } + } + } + + if (self.eatToken(.DoubleQuote)) |_| { + node.start = node.start.? + 1; + while (true) { + const pos = self.token_it.pos; + const tok = self.token_it.next(); + switch (tok.id) { + .DoubleQuote => { + node.end = self.token_it.pos - 2; + break :parse; + }, + .NewLine => return error.UnexpectedToken, + else => {}, + } + } + } + + // TODO handle multiline strings in new block scope + while (true) { + const pos = self.token_it.pos; + const tok = self.token_it.next(); + switch (tok.id) { + .Literal => {}, + .Space => { + const trailing = self.token_it.pos - 2; + self.eatCommentsAndSpace(); + if (self.token_it.peek()) |peek| { + if (peek.id != .Literal) { + node.end = trailing; + break; + } + } + }, + else => { + self.token_it.seekBy(-1); + node.end = self.token_it.pos - 1; + break; + }, + } + } + } + + log.debug("Leaf end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn openScope(self: *Parser) !void { + const peek = self.token_it.peek() orelse return error.UnexpectedEof; + if (peek.id != .Space and peek.id != .Tab) { + // No need to open scope. + return; + } + const indent = self.token_it.next().count.?; + const prev_scope = self.scopes.items[self.scopes.items.len - 1]; + if (indent < prev_scope.indent) { + return error.MalformedYaml; + } + + log.debug("Opening scope...", .{}); + + try self.scopes.append(self.allocator, .{ + .indent = indent, + }); + } + + fn closeScope(self: *Parser) !bool { + const indent = indent: { + const peek = self.token_it.peek() orelse return error.UnexpectedEof; + switch (peek.id) { + .Space, .Tab => { + break :indent self.token_it.next().count.?; + }, + else => { + break :indent 0; + }, + } + }; + + const scope = self.scopes.items[self.scopes.items.len - 1]; + if (indent < scope.indent) { + log.debug("Closing scope...", .{}); + _ = self.scopes.pop(); + return true; + } + + return false; + } + + fn eatCommentsAndSpace(self: *Parser) void { + while (true) { + _ = self.token_it.peek() orelse return; + const token = self.token_it.next(); + switch (token.id) { + .Comment, .Space => {}, + else => { + self.token_it.seekBy(-1); + break; + }, + } + } + } + + fn eatToken(self: *Parser, id: Token.Id) ?TokenIndex { + while (true) { + const pos = self.token_it.pos; + _ = self.token_it.peek() orelse return null; + const token = self.token_it.next(); + switch (token.id) { + .Comment, .Space => continue, + else => |next_id| if (next_id == id) { + return pos; + } else { + self.token_it.seekTo(pos); + return null; + }, + } + } + } + + fn expectToken(self: *Parser, id: Token.Id) ParseError!TokenIndex { + return self.eatToken(id) orelse error.UnexpectedToken; + } +}; + +test { + _ = @import("parse/test.zig"); +} diff --git a/src/link/tapi/parse/test.zig b/src/link/tapi/parse/test.zig new file mode 100644 index 0000000000..b96a71fe97 --- /dev/null +++ b/src/link/tapi/parse/test.zig @@ -0,0 +1,556 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; + +usingnamespace @import("../parse.zig"); + +test "explicit doc" { + const source = + \\--- !tapi-tbd + \\tbd-version: 4 + \\abc-version: 5 + \\... + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + const directive = tree.tokens[doc.directive.?]; + try testing.expectEqual(directive.id, .Literal); + try testing.expect(mem.eql(u8, "tapi-tbd", tree.source[directive.start..directive.end])); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 5); + try testing.expectEqual(map.end.?, 14); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "tbd-version", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql(u8, "4", tree.source[value_tok.start..value_tok.end])); + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "abc-version", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql(u8, "5", tree.source[value_tok.start..value_tok.end])); + } +} + +test "leaf in quotes" { + const source = + \\key1: no quotes + \\key2: 'single quoted' + \\key3: "double quoted" + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 3); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1", + tree.source[key.start..key.end], + )); + + const value = entry.value.cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expectEqual(start.id, .Literal); + try testing.expectEqual(end.id, .Literal); + try testing.expect(mem.eql( + u8, + "no quotes", + tree.source[start.start..end.end], + )); + } +} + +test "nested maps" { + const source = + \\key1: + \\ key1_1 : value1_1 + \\ key1_2 : value1_2 + \\key2 : value2 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end])); + + const nested_map = entry.value.cast(Node.Map).?; + try testing.expectEqual(nested_map.start.?, 4); + try testing.expectEqual(nested_map.end.?, 16); + try testing.expectEqual(nested_map.values.items.len, 2); + + { + const nested_entry = nested_map.values.items[0]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1_1", + tree.source[nested_key.start..nested_key.end], + )); + + const nested_value = nested_entry.value.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.start.?]; + try testing.expectEqual(nested_value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value1_1", + tree.source[nested_value_tok.start..nested_value_tok.end], + )); + } + + { + const nested_entry = nested_map.values.items[1]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1_2", + tree.source[nested_key.start..nested_key.end], + )); + + const nested_value = nested_entry.value.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.start.?]; + try testing.expectEqual(nested_value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value1_2", + tree.source[nested_value_tok.start..nested_value_tok.end], + )); + } + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key2", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value2", + tree.source[value_tok.start..value_tok.end], + )); + } +} + +test "map of list of values" { + const source = + \\ints: + \\ - 0 + \\ - 1 + \\ - 2 + ; + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "ints", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.List).?; + try testing.expectEqual(value.start.?, 4); + try testing.expectEqual(value.end.?, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "0", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "1", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "2", tree.source[leaf.start..leaf.end])); + } +} + +test "map of list of maps" { + const source = + \\key1: + \\- key2 : value2 + \\- key3 : value3 + \\- key4 : value4 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.List).?; + try testing.expectEqual(value.start.?, 3); + try testing.expectEqual(value.end.?, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key2", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value2", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[1].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key3", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value3", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[2].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key4", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value4", tree.source[leaf.start..leaf.end])); + } +} + +test "list of lists" { + const source = + \\- [name , hr, avg ] + \\- [Mark McGwire , 65, 0.278] + \\- [Sammy Sosa , 63, 0.288] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.start.?, 0); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .list); + const nested = list.values.items[0].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } + } + + { + try testing.expectEqual(list.values.items[1].tag, .list); + const nested = list.values.items[1].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expect(mem.eql(u8, "Mark McGwire", tree.source[start.start..end.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "65", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "0.278", tree.source[leaf.start..leaf.end])); + } + } + + { + try testing.expectEqual(list.values.items[2].tag, .list); + const nested = list.values.items[2].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expect(mem.eql(u8, "Sammy Sosa", tree.source[start.start..end.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "63", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "0.288", tree.source[leaf.start..leaf.end])); + } + } +} + +test "inline list" { + const source = + \\[name , hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.start.?, 0); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } +} + +test "inline list as mapping value" { + const source = + \\key : [ + \\ name , + \\ hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key", tree.source[key.start..key.end])); + + const list = entry.value.cast(Node.List).?; + try testing.expectEqual(list.start.?, 4); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } +} diff --git a/src/link/tapi/yaml.zig b/src/link/tapi/yaml.zig new file mode 100644 index 0000000000..42f5d44e76 --- /dev/null +++ b/src/link/tapi/yaml.zig @@ -0,0 +1,652 @@ +const std = @import("std"); +const assert = std.debug.assert; +const math = std.math; +const mem = std.mem; +const testing = std.testing; +const log = std.log.scoped(.tapi); + +const Allocator = mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; + +pub const Tokenizer = @import("Tokenizer.zig"); +pub const parse = @import("parse.zig"); + +const Node = parse.Node; +const Tree = parse.Tree; +const ParseError = parse.ParseError; + +pub const YamlError = error{ + UnexpectedNodeType, + OutOfMemory, +} || ParseError || std.fmt.ParseIntError; + +pub const ValueType = enum { + empty, + int, + float, + string, + list, + map, +}; + +pub const List = []Value; +pub const Map = std.StringArrayHashMap(Value); + +pub const Value = union(ValueType) { + empty, + int: i64, + float: f64, + string: []const u8, + list: List, + map: Map, + + pub fn asInt(self: Value) !i64 { + if (self != .int) return error.TypeMismatch; + return self.int; + } + + pub fn asFloat(self: Value) !f64 { + if (self != .float) return error.TypeMismatch; + return self.float; + } + + pub fn asString(self: Value) ![]const u8 { + if (self != .string) return error.TypeMismatch; + return self.string; + } + + pub fn asList(self: Value) !List { + if (self != .list) return error.TypeMismatch; + return self.list; + } + + pub fn asMap(self: Value) !Map { + if (self != .map) return error.TypeMismatch; + return self.map; + } + + const StringifyArgs = struct { + indentation: usize = 0, + should_inline_first_key: bool = false, + }; + + pub const StringifyError = std.os.WriteError; + + pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) StringifyError!void { + switch (self) { + .empty => return, + .int => |int| return writer.print("{}", .{int}), + .float => |float| return writer.print("{d}", .{float}), + .string => |string| return writer.print("{s}", .{string}), + .list => |list| { + const len = list.len; + if (len == 0) return; + + const first = list[0]; + if (first.is_compound()) { + for (list) |elem, i| { + try writer.writeByteNTimes(' ', args.indentation); + try writer.writeAll("- "); + try elem.stringify(writer, .{ + .indentation = args.indentation + 2, + .should_inline_first_key = true, + }); + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + return; + } + + try writer.writeAll("[ "); + for (list) |elem, i| { + try elem.stringify(writer, args); + if (i < len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(" ]"); + }, + .map => |map| { + const keys = map.keys(); + const len = keys.len; + if (len == 0) return; + + for (keys) |key, i| { + if (!args.should_inline_first_key or i != 0) { + try writer.writeByteNTimes(' ', args.indentation); + } + try writer.print("{s}: ", .{key}); + + const value = map.get(key) orelse unreachable; + const should_inline = blk: { + if (!value.is_compound()) break :blk true; + if (value == .list and value.list.len > 0 and !value.list[0].is_compound()) break :blk true; + break :blk false; + }; + + if (should_inline) { + try value.stringify(writer, args); + } else { + try writer.writeByte('\n'); + try value.stringify(writer, .{ + .indentation = args.indentation + 4, + }); + } + + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + }, + } + } + + fn is_compound(self: Value) bool { + return switch (self) { + .list, .map => true, + else => false, + }; + } + + fn fromNode(arena: *Allocator, tree: *const Tree, node: *const Node, type_hint: ?ValueType) YamlError!Value { + if (node.cast(Node.Doc)) |doc| { + const inner = doc.value orelse { + // empty doc + return Value{ .empty = .{} }; + }; + return Value.fromNode(arena, tree, inner, null); + } else if (node.cast(Node.Map)) |map| { + var out_map = std.StringArrayHashMap(Value).init(arena); + try out_map.ensureUnusedCapacity(map.values.items.len); + + for (map.values.items) |entry| { + const key_tok = tree.tokens[entry.key]; + const key = try arena.dupe(u8, tree.source[key_tok.start..key_tok.end]); + const value = try Value.fromNode(arena, tree, entry.value, null); + + out_map.putAssumeCapacityNoClobber(key, value); + } + + return Value{ .map = out_map }; + } else if (node.cast(Node.List)) |list| { + var out_list = std.ArrayList(Value).init(arena); + try out_list.ensureUnusedCapacity(list.values.items.len); + + if (list.values.items.len > 0) { + const hint = if (list.values.items[0].cast(Node.Value)) |value| hint: { + const elem = list.values.items[0]; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + const raw = tree.source[start.start..end.end]; + _ = std.fmt.parseInt(i64, raw, 10) catch { + _ = std.fmt.parseFloat(f64, raw) catch { + break :hint ValueType.string; + }; + break :hint ValueType.float; + }; + break :hint ValueType.int; + } else null; + + for (list.values.items) |elem| { + const value = try Value.fromNode(arena, tree, elem, hint); + out_list.appendAssumeCapacity(value); + } + } + + return Value{ .list = out_list.toOwnedSlice() }; + } else if (node.cast(Node.Value)) |value| { + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + const raw = tree.source[start.start..end.end]; + + if (type_hint) |hint| { + return switch (hint) { + .int => Value{ .int = try std.fmt.parseInt(i64, raw, 10) }, + .float => Value{ .float = try std.fmt.parseFloat(f64, raw) }, + .string => Value{ .string = try arena.dupe(u8, raw) }, + else => unreachable, + }; + } + + try_int: { + // TODO infer base for int + const int = std.fmt.parseInt(i64, raw, 10) catch break :try_int; + return Value{ .int = int }; + } + try_float: { + const float = std.fmt.parseFloat(f64, raw) catch break :try_float; + return Value{ .float = float }; + } + return Value{ .string = try arena.dupe(u8, raw) }; + } else { + log.err("Unexpected node type: {}", .{node.tag}); + return error.UnexpectedNodeType; + } + } +}; + +pub const Yaml = struct { + arena: ArenaAllocator, + tree: ?Tree = null, + docs: std.ArrayList(Value), + + pub fn deinit(self: *Yaml) void { + self.arena.deinit(); + } + + pub fn stringify(self: Yaml, writer: anytype) !void { + for (self.docs.items) |doc| { + // if (doc.directive) |directive| { + // try writer.print("--- !{s}\n", .{directive}); + // } + try doc.stringify(writer, .{}); + // if (doc.directive != null) { + // try writer.writeAll("...\n"); + // } + } + } + + pub fn load(allocator: *Allocator, source: []const u8) !Yaml { + var arena = ArenaAllocator.init(allocator); + + var tree = Tree.init(&arena.allocator); + try tree.parse(source); + + var docs = std.ArrayList(Value).init(&arena.allocator); + try docs.ensureUnusedCapacity(tree.docs.items.len); + + for (tree.docs.items) |node| { + const value = try Value.fromNode(&arena.allocator, &tree, node, null); + docs.appendAssumeCapacity(value); + } + + return Yaml{ + .arena = arena, + .tree = tree, + .docs = docs, + }; + } + + pub const Error = error{ + Unimplemented, + TypeMismatch, + StructFieldMissing, + ArraySizeMismatch, + UntaggedUnion, + UnionTagMissing, + Overflow, + OutOfMemory, + }; + + pub fn parse(self: *Yaml, comptime T: type) Error!T { + if (self.docs.items.len == 0) { + if (@typeInfo(T) == .Void) return {}; + return error.TypeMismatch; + } + + if (self.docs.items.len == 1) { + return self.parseValue(T, self.docs.items[0]); + } + + switch (@typeInfo(T)) { + .Array => |info| { + var parsed: T = undefined; + for (self.docs.items) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + .Pointer => |info| { + switch (info.size) { + .Slice => { + var parsed = try self.arena.allocator.alloc(info.child, self.docs.items.len); + for (self.docs.items) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + else => return error.TypeMismatch, + } + }, + .Union => return error.Unimplemented, + else => return error.TypeMismatch, + } + } + + fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T { + return switch (@typeInfo(T)) { + .Int => math.cast(T, try value.asInt()), + .Float => math.lossyCast(T, try value.asFloat()), + .Struct => self.parseStruct(T, try value.asMap()), + .Union => self.parseUnion(T, value), + .Array => self.parseArray(T, try value.asList()), + .Pointer => { + if (value.asList()) |list| { + return self.parsePointer(T, .{ .list = list }); + } else |_| { + return self.parsePointer(T, .{ .string = try value.asString() }); + } + }, + .Void => error.TypeMismatch, + .Optional => unreachable, + else => error.Unimplemented, + }; + } + + fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T { + const union_info = @typeInfo(T).Union; + + if (union_info.tag_type) |_| { + inline for (union_info.fields) |field| { + if (self.parseValue(field.field_type, value)) |u_value| { + return @unionInit(T, field.name, u_value); + } else |err| { + if (@as(@TypeOf(err) || error{TypeMismatch}, err) != error.TypeMismatch) return err; + } + } + } else return error.UntaggedUnion; + + return error.UnionTagMissing; + } + + fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T { + const unwrapped = value orelse return null; + const opt_info = @typeInfo(T).Optional; + return @as(T, try self.parseValue(opt_info.child, unwrapped)); + } + + fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T { + const struct_info = @typeInfo(T).Struct; + var parsed: T = undefined; + + inline for (struct_info.fields) |field| { + const value: ?Value = map.get(field.name) orelse blk: { + const field_name = try mem.replaceOwned(u8, &self.arena.allocator, field.name, "_", "-"); + break :blk map.get(field_name); + }; + + if (@typeInfo(field.field_type) == .Optional) { + @field(parsed, field.name) = try self.parseOptional(field.field_type, value); + continue; + } + + const unwrapped = value orelse { + log.err("missing struct field: {s}: {s}", .{ field.name, @typeName(field.field_type) }); + return error.StructFieldMissing; + }; + @field(parsed, field.name) = try self.parseValue(field.field_type, unwrapped); + } + + return parsed; + } + + fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T { + const ptr_info = @typeInfo(T).Pointer; + const arena = &self.arena.allocator; + + switch (ptr_info.size) { + .Slice => { + const child_info = @typeInfo(ptr_info.child); + if (child_info == .Int and child_info.Int.bits == 8) { + return value.asString(); + } + + var parsed = try arena.alloc(ptr_info.child, value.list.len); + for (value.list) |elem, i| { + parsed[i] = try self.parseValue(ptr_info.child, elem); + } + return parsed; + }, + else => return error.Unimplemented, + } + } + + fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T { + const array_info = @typeInfo(T).Array; + if (array_info.len != list.len) return error.ArraySizeMismatch; + + var parsed: T = undefined; + for (list) |elem, i| { + parsed[i] = try self.parseValue(array_info.child, elem); + } + + return parsed; + } +}; + +test { + testing.refAllDecls(@This()); +} + +test "simple list" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const list = yaml.docs.items[0].list; + try testing.expectEqual(list.len, 3); + + try testing.expect(mem.eql(u8, list[0].string, "a")); + try testing.expect(mem.eql(u8, list[1].string, "b")); + try testing.expect(mem.eql(u8, list[2].string, "c")); +} + +test "simple list typed as array of strings" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3][]const u8); + try testing.expectEqual(arr.len, 3); + try testing.expect(mem.eql(u8, arr[0], "a")); + try testing.expect(mem.eql(u8, arr[1], "b")); + try testing.expect(mem.eql(u8, arr[2], "c")); +} + +test "simple list typed as array of ints" { + const source = + \\- 0 + \\- 1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]u8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqual(arr[0], 0); + try testing.expectEqual(arr[1], 1); + try testing.expectEqual(arr[2], 2); +} + +test "list of mixed sign integer" { + const source = + \\- 0 + \\- -1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]i8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqual(arr[0], 0); + try testing.expectEqual(arr[1], -1); + try testing.expectEqual(arr[2], 2); +} + +test "simple map untyped" { + const source = + \\a: 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expectEqual(map.get("a").?.int, 0); +} + +test "simple map typed" { + const source = + \\a: 0 + \\b: hello there + \\c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { a: usize, b: []const u8, c: []const u8 }); + try testing.expectEqual(simple.a, 0); + try testing.expect(mem.eql(u8, simple.b, "hello there")); + try testing.expect(mem.eql(u8, simple.c, "wait, what?")); +} + +test "typed nested structs" { + const source = + \\a: + \\ b: hello there + \\ c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { + a: struct { + b: []const u8, + c: []const u8, + }, + }); + try testing.expect(mem.eql(u8, simple.a.b, "hello there")); + try testing.expect(mem.eql(u8, simple.a.c, "wait, what?")); +} + +test "multidoc typed as a slice of structs" { + const source = + \\--- + \\a: 0 + \\--- + \\a: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + { + const result = try yaml.parse([2]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } + + { + const result = try yaml.parse([]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } +} + +test "multidoc typed as a struct is an error" { + const source = + \\--- + \\a: 0 + \\--- + \\b: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { b: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize, b: usize })); +} + +test "multidoc typed as a slice of structs with optionals" { + const source = + \\--- + \\a: 0 + \\c: 1.0 + \\--- + \\a: 1 + \\b: different field + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const result = try yaml.parse([]struct { a: usize, b: ?[]const u8, c: ?f16 }); + try testing.expectEqual(result.len, 2); + + try testing.expectEqual(result[0].a, 0); + try testing.expect(result[0].b == null); + try testing.expect(result[0].c != null); + try testing.expectEqual(result[0].c.?, 1.0); + + try testing.expectEqual(result[1].a, 1); + try testing.expect(result[1].b != null); + try testing.expect(mem.eql(u8, result[1].b.?, "different field")); + try testing.expect(result[1].c == null); +} + +test "empty yaml can be represented as void" { + const source = ""; + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + const result = try yaml.parse(void); + try testing.expect(@TypeOf(result) == void); +} + +test "nonempty yaml cannot be represented as void" { + const source = + \\a: b + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(void)); +} + +test "typed array size mismatch" { + const source = + \\- 0 + \\- 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([1]usize)); + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([5]usize)); +}