stage2: start reworking Module/astgen for memory layout changes

This commit does not reach any particular milestone, it is
work-in-progress towards getting things to build.

There's a `@panic("TODO")` in translate-c that should be removed when
working on translate-c stuff.
This commit is contained in:
Andrew Kelley 2021-02-11 23:29:55 -07:00
parent 288e180598
commit 3d0f4b9030
9 changed files with 1177 additions and 742 deletions

View File

@ -12,7 +12,6 @@ pub const fmtId = @import("zig/fmt.zig").fmtId;
pub const fmtEscapes = @import("zig/fmt.zig").fmtEscapes;
pub const parse = @import("zig/parse.zig").parse;
pub const parseStringLiteral = @import("zig/string_literal.zig").parse;
pub const render = @import("zig/render.zig").render;
pub const ast = @import("zig/ast.zig");
pub const system = @import("zig/system.zig");
pub const CrossTarget = @import("zig/cross_target.zig").CrossTarget;

View File

@ -45,6 +45,28 @@ pub const Tree = struct {
tree.* = undefined;
}
pub const RenderError = error{
/// Ran out of memory allocating call stack frames to complete rendering, or
/// ran out of memory allocating space in the output buffer.
OutOfMemory,
};
/// `gpa` is used for allocating the resulting formatted source code, as well as
/// for allocating extra stack memory if needed, because this function utilizes recursion.
/// Note: that's not actually true yet, see https://github.com/ziglang/zig/issues/1006.
/// Caller owns the returned slice of bytes, allocated with `gpa`.
pub fn render(tree: Tree, gpa: *mem.Allocator) RenderError![]u8 {
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
try tree.renderToArrayList(&buffer);
return buffer.toOwnedSlice();
}
pub fn renderToArrayList(tree: Tree, buffer: *std.ArrayList(u8)) RenderError!void {
return @import("./render.zig").renderTree(buffer, tree);
}
pub fn tokenLocation(self: Tree, start_offset: ByteOffset, token_index: TokenIndex) Location {
var loc = Location{
.line = 0,
@ -72,6 +94,27 @@ pub const Tree = struct {
return loc;
}
pub fn tokenSlice(tree: Tree, token_index: TokenIndex) []const u8 {
const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);
const token_tag = token_tags[token_index];
// Many tokens can be determined entirely by their tag.
if (token_tag.lexeme()) |lexeme| {
return lexeme;
}
// For some tokens, re-tokenization is needed to find the end.
var tokenizer: std.zig.Tokenizer = .{
.buffer = tree.source,
.index = token_starts[token_index],
.pending_invalid_token = null,
};
const token = tokenizer.next();
assert(token.tag == token_tag);
return tree.source[token.loc.start..token.loc.end];
}
pub fn extraData(tree: Tree, index: usize, comptime T: type) T {
const fields = std.meta.fields(T);
var result: T = undefined;
@ -82,6 +125,12 @@ pub const Tree = struct {
return result;
}
pub fn rootDecls(tree: Tree) []const Node.Index {
// Root is always index 0.
const nodes_data = tree.nodes.items(.data);
return tree.extra_data[nodes_data[0].lhs..nodes_data[0].rhs];
}
pub fn renderError(tree: Tree, parse_error: Error, stream: anytype) !void {
const tokens = tree.tokens.items(.tag);
switch (parse_error) {
@ -966,6 +1015,15 @@ pub const Tree = struct {
return mem.indexOfScalar(u8, source, '\n') == null;
}
pub fn getNodeSource(tree: Tree, node: Node.Index) []const u8 {
const token_starts = tree.tokens.items(.start);
const first_token = tree.firstToken(node);
const last_token = tree.lastToken(node);
const start = token_starts[first_token];
const len = tree.tokenSlice(last_token).len;
return tree.source[start..][0..len];
}
pub fn globalVarDecl(tree: Tree, node: Node.Index) full.VarDecl {
assert(tree.nodes.items(.tag)[node] == .global_var_decl);
const data = tree.nodes.items(.data)[node];
@ -1653,7 +1711,31 @@ pub const Tree = struct {
const token_tags = tree.tokens.items(.tag);
var result: full.FnProto = .{
.ast = info,
.visib_token = null,
.extern_export_token = null,
.lib_name = null,
.name_token = null,
.lparen = undefined,
};
var i = info.fn_token;
while (i > 0) {
i -= 1;
switch (token_tags[i]) {
.keyword_extern, .keyword_export => result.extern_export_token = i,
.keyword_pub => result.visib_token = i,
.string_literal => result.lib_name = i,
else => break,
}
}
const after_fn_token = info.fn_token + 1;
if (token_tags[after_fn_token] == .identifier) {
result.name_token = after_fn_token;
result.lparen = after_fn_token + 1;
} else {
result.lparen = after_fn_token;
}
assert(token_tags[result.lparen] == .l_paren);
return result;
}
@ -1924,6 +2006,11 @@ pub const full = struct {
};
pub const FnProto = struct {
visib_token: ?TokenIndex,
extern_export_token: ?TokenIndex,
lib_name: ?TokenIndex,
name_token: ?TokenIndex,
lparen: TokenIndex,
ast: Ast,
pub const Ast = struct {
@ -1934,6 +2021,114 @@ pub const full = struct {
section_expr: Node.Index,
callconv_expr: Node.Index,
};
pub const Param = struct {
first_doc_comment: ?TokenIndex,
name_token: ?TokenIndex,
comptime_noalias: ?TokenIndex,
anytype_ellipsis3: ?TokenIndex,
type_expr: Node.Index,
};
/// Abstracts over the fact that anytype and ... are not included
/// in the params slice, since they are simple identifiers and
/// not sub-expressions.
pub const Iterator = struct {
tree: *const Tree,
fn_proto: *const FnProto,
param_i: usize,
tok_i: TokenIndex,
tok_flag: bool,
pub fn next(it: *Iterator) ?Param {
const token_tags = it.tree.tokens.items(.tag);
while (true) {
var first_doc_comment: ?TokenIndex = null;
var comptime_noalias: ?TokenIndex = null;
var name_token: ?TokenIndex = null;
if (!it.tok_flag) {
if (it.param_i >= it.fn_proto.ast.params.len) {
return null;
}
const param_type = it.fn_proto.ast.params[it.param_i];
var tok_i = tree.firstToken(param_type) - 1;
while (true) : (tok_i -= 1) switch (token_tags[tok_i]) {
.colon => continue,
.identifier => name_token = tok_i,
.doc_comment => first_doc_comment = tok_i,
.keyword_comptime, .keyword_noalias => comptime_noalias = tok_i,
else => break,
};
it.param_i += 1;
it.tok_i = tree.lastToken(param_type) + 1;
it.tok_flag = true;
return Param{
.first_doc_comment = first_doc_comment,
.comptime_noalias = comptime_noalias,
.name_token = name_token,
.anytype_ellipsis3 = null,
.type_expr = param_type,
};
}
// Look for anytype and ... params afterwards.
if (token_tags[it.tok_i] == .comma) {
it.tok_i += 1;
} else {
return null;
}
if (token_tags[it.tok_i] == .doc_comment) {
first_doc_comment = it.tok_i;
while (token_tags[it.tok_i] == .doc_comment) {
it.tok_i += 1;
}
}
switch (token_tags[it.tok_i]) {
.ellipsis3 => {
it.tok_flag = false; // Next iteration should return null.
return Param{
.first_doc_comment = first_doc_comment,
.comptime_noalias = null,
.name_token = null,
.anytype_ellipsis3 = it.tok_i,
.type_expr = 0,
};
},
.keyword_noalias, .keyword_comptime => {
comptime_noalias = it.tok_i;
it.tok_i += 1;
},
else => {},
}
if (token_tags[it.tok_i] == .identifier and
token_tags[it.tok_i + 1] == .colon)
{
name_token = it.tok_i;
it.tok_i += 2;
}
if (token_tags[it.tok_i] == .keyword_anytype) {
it.tok_i += 1;
return Param{
.first_doc_comment = first_doc_comment,
.comptime_noalias = comptime_noalias,
.name_token = name_token,
.anytype_ellipsis3 = it.tok_i - 1,
.type_expr = param_type,
};
}
it.tok_flag = false;
}
}
};
pub fn iterate(fn_proto: FnProto, tree: Tree) Iterator {
return .{
.tree = &tree,
.fn_proto = &fn_proto,
.param_i = 0,
.tok_i = undefined,
.tok_flag = false,
};
}
};
pub const StructInit = struct {

View File

@ -4223,7 +4223,7 @@ fn testParse(source: []const u8, allocator: *mem.Allocator, anything_changed: *b
return error.ParseError;
}
const formatted = try std.zig.render(allocator, tree);
const formatted = try tree.render(allocator);
anything_changed.* = !mem.eql(u8, formatted, source);
return formatted;
}

View File

@ -13,28 +13,24 @@ const Token = std.zig.Token;
const indent_delta = 4;
const asm_indent_delta = 2;
pub const Error = error{
/// Ran out of memory allocating call stack frames to complete rendering, or
/// ran out of memory allocating space in the output buffer.
OutOfMemory,
};
pub const Error = ast.Tree.RenderError;
const Writer = std.ArrayList(u8).Writer;
const Ais = std.io.AutoIndentingStream(Writer);
/// `gpa` is used for allocating the resulting formatted source code, as well as
/// for allocating extra stack memory if needed, because this function utilizes recursion.
/// Note: that's not actually true yet, see https://github.com/ziglang/zig/issues/1006.
/// Caller owns the returned slice of bytes, allocated with `gpa`.
pub fn render(gpa: *mem.Allocator, tree: ast.Tree) Error![]u8 {
pub fn renderTree(buffer: *std.ArrayList(u8), tree: ast.Tree) Error!void {
assert(tree.errors.len == 0); // Cannot render an invalid tree.
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
var auto_indenting_stream = std.io.autoIndentingStream(indent_delta, buffer.writer());
try renderRoot(&auto_indenting_stream, tree);
return buffer.toOwnedSlice();
const ais = &auto_indenting_stream;
// Render all the line comments at the beginning of the file.
const src_start: usize = if (mem.startsWith(u8, tree.source, "\xEF\xBB\xBF")) 3 else 0;
const comment_end_loc: usize = tree.tokens.items(.start)[0];
_ = try renderCommentsAndNewlines(ais, tree, src_start, comment_end_loc);
for (tree.rootDecls()) |decl| {
try renderMember(ais, tree, decl, .newline);
}
}
/// Assumes that start is the first byte past the previous token and
@ -75,21 +71,6 @@ fn renderCommentsAndNewlines(ais: *Ais, tree: ast.Tree, start: usize, end: usize
return index != start;
}
fn renderRoot(ais: *Ais, tree: ast.Tree) Error!void {
// Render all the line comments at the beginning of the file.
const src_start: usize = if (mem.startsWith(u8, tree.source, "\xEF\xBB\xBF")) 3 else 0;
const comment_end_loc: usize = tree.tokens.items(.start)[0];
_ = try renderCommentsAndNewlines(ais, tree, src_start, comment_end_loc);
// Root is always index 0.
const nodes_data = tree.nodes.items(.data);
const root_decls = tree.extra_data[nodes_data[0].lhs..nodes_data[0].rhs];
for (root_decls) |decl| {
try renderMember(ais, tree, decl, .newline);
}
}
fn renderMember(ais: *Ais, tree: ast.Tree, decl: ast.Node.Index, space: Space) Error!void {
const token_tags = tree.tokens.items(.tag);
const main_tokens = tree.nodes.items(.main_token);
@ -1944,17 +1925,7 @@ fn renderToken(ais: *Ais, tree: ast.Tree, token_index: ast.TokenIndex, space: Sp
const token_starts = tree.tokens.items(.start);
const token_start = token_starts[token_index];
const token_tag = token_tags[token_index];
const lexeme = token_tag.lexeme() orelse lexeme: {
var tokenizer: std.zig.Tokenizer = .{
.buffer = tree.source,
.index = token_start,
.pending_invalid_token = null,
};
const token = tokenizer.next();
assert(token.tag == token_tag);
break :lexeme tree.source[token.loc.start..token.loc.end];
};
const lexeme = tree.tokenSlice(token_index);
try ais.writer().writeAll(lexeme);
switch (space) {

View File

@ -921,7 +921,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
// TODO this is duped so it can be freed in Container.deinit
.sub_file_path = try gpa.dupe(u8, root_pkg.root_src_path),
.source = .{ .unloaded = {} },
.contents = .{ .not_available = {} },
.tree = undefined,
.status = .never_loaded,
.pkg = root_pkg,
.root_container = .{
@ -1882,7 +1882,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
const c_headers_dir_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{"include"});
const c_headers_dir_path_z = try arena.dupeZ(u8, c_headers_dir_path);
var clang_errors: []translate_c.ClangErrMsg = &[0]translate_c.ClangErrMsg{};
const tree = translate_c.translate(
var tree = translate_c.translate(
comp.gpa,
new_argv.ptr,
new_argv.ptr + new_argv.len,
@ -1901,7 +1901,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
};
},
};
defer tree.deinit();
defer tree.deinit(comp.gpa);
if (comp.verbose_cimport) {
log.info("C import .d file: {s}", .{out_dep_path});
@ -1919,9 +1919,10 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
var out_zig_file = try o_dir.createFile(cimport_zig_basename, .{});
defer out_zig_file.close();
var bos = std.io.bufferedWriter(out_zig_file.writer());
_ = try std.zig.render(comp.gpa, bos.writer(), tree);
try bos.flush();
const formatted = try tree.render(comp.gpa);
defer comp.gpa.free(formatted);
try out_zig_file.writeAll(formatted);
man.writeManifest() catch |err| {
log.warn("failed to write cache manifest for C import: {s}", .{@errorName(err)});

File diff suppressed because it is too large Load Diff

View File

@ -2153,7 +2153,7 @@ fn cmdTranslateC(comp: *Compilation, arena: *Allocator, enable_cache: bool) !voi
const c_headers_dir_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{"include"});
const c_headers_dir_path_z = try arena.dupeZ(u8, c_headers_dir_path);
var clang_errors: []translate_c.ClangErrMsg = &[0]translate_c.ClangErrMsg{};
const tree = translate_c.translate(
var tree = translate_c.translate(
comp.gpa,
new_argv.ptr,
new_argv.ptr + new_argv.len,
@ -2174,7 +2174,7 @@ fn cmdTranslateC(comp: *Compilation, arena: *Allocator, enable_cache: bool) !voi
process.exit(1);
},
};
defer tree.deinit();
defer tree.deinit(comp.gpa);
if (out_dep_path) |dep_file_path| {
const dep_basename = std.fs.path.basename(dep_file_path);
@ -2188,16 +2188,21 @@ fn cmdTranslateC(comp: *Compilation, arena: *Allocator, enable_cache: bool) !voi
const digest = man.final();
const o_sub_path = try fs.path.join(arena, &[_][]const u8{ "o", &digest });
var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{});
defer o_dir.close();
var zig_file = try o_dir.createFile(translated_zig_basename, .{});
defer zig_file.close();
var bw = io.bufferedWriter(zig_file.writer());
_ = try std.zig.render(comp.gpa, bw.writer(), tree);
try bw.flush();
const formatted = try tree.render(comp.gpa);
defer comp.gpa.free(formatted);
man.writeManifest() catch |err| warn("failed to write cache manifest: {s}", .{@errorName(err)});
try zig_file.writeAll(formatted);
man.writeManifest() catch |err| warn("failed to write cache manifest: {s}", .{
@errorName(err),
});
break :digest digest;
};
@ -2684,10 +2689,10 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
const source_code = try stdin.readAllAlloc(gpa, max_src_size);
defer gpa.free(source_code);
const tree = std.zig.parse(gpa, source_code) catch |err| {
var tree = std.zig.parse(gpa, source_code) catch |err| {
fatal("error parsing stdin: {s}", .{err});
};
defer tree.deinit();
defer tree.deinit(gpa);
for (tree.errors) |parse_error| {
try printErrMsgToFile(gpa, parse_error, tree, "<stdin>", stderr_file, color);
@ -2695,16 +2700,15 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
if (tree.errors.len != 0) {
process.exit(1);
}
const formatted = try tree.render(gpa);
defer gpa.free(formatted);
if (check_flag) {
const anything_changed = try std.zig.render(gpa, io.null_writer, tree);
const code = if (anything_changed) @as(u8, 1) else @as(u8, 0);
const code: u8 = @boolToInt(mem.eql(u8, formatted, source_code));
process.exit(code);
}
var bw = io.bufferedWriter(io.getStdOut().writer());
_ = try std.zig.render(gpa, bw.writer(), tree);
try bw.flush();
return;
return io.getStdOut().writeAll(formatted);
}
if (input_files.items.len == 0) {
@ -2841,8 +2845,8 @@ fn fmtPathFile(
// Add to set after no longer possible to get error.IsDir.
if (try fmt.seen.fetchPut(stat.inode, {})) |_| return;
const tree = try std.zig.parse(fmt.gpa, source_code);
defer tree.deinit();
var tree = try std.zig.parse(fmt.gpa, source_code);
defer tree.deinit(fmt.gpa);
for (tree.errors) |parse_error| {
try printErrMsgToFile(fmt.gpa, parse_error, tree, file_path, std.io.getStdErr(), fmt.color);
@ -2852,22 +2856,20 @@ fn fmtPathFile(
return;
}
if (check_mode) {
const anything_changed = try std.zig.render(fmt.gpa, io.null_writer, tree);
if (anything_changed) {
const stdout = io.getStdOut().writer();
try stdout.print("{s}\n", .{file_path});
fmt.any_error = true;
}
} else {
// As a heuristic, we make enough capacity for the same as the input source.
try fmt.out_buffer.ensureCapacity(source_code.len);
fmt.out_buffer.items.len = 0;
const writer = fmt.out_buffer.writer();
const anything_changed = try std.zig.render(fmt.gpa, writer, tree);
if (!anything_changed)
return; // Good thing we didn't waste any file system access on this.
// As a heuristic, we make enough capacity for the same as the input source.
fmt.out_buffer.shrinkRetainingCapacity(0);
try fmt.out_buffer.ensureCapacity(source_code.len);
try tree.renderToArrayList(&fmt.out_buffer);
const anything_changed = mem.eql(u8, fmt.out_buffer.items, source_code);
if (!anything_changed)
return;
if (check_mode) {
const stdout = io.getStdOut().writer();
try stdout.print("{s}\n", .{file_path});
fmt.any_error = true;
} else {
var af = try dir.atomicFile(sub_path, .{ .mode = stat.mode });
defer af.deinit();
@ -2881,7 +2883,7 @@ fn fmtPathFile(
fn printErrMsgToFile(
gpa: *mem.Allocator,
parse_error: ast.Error,
tree: *ast.Tree,
tree: ast.Tree,
path: []const u8,
file: fs.File,
color: Color,
@ -2892,18 +2894,16 @@ fn printErrMsgToFile(
.off => false,
};
const lok_token = parse_error.loc();
const span_first = lok_token;
const span_last = lok_token;
const first_token = tree.token_locs[span_first];
const last_token = tree.token_locs[span_last];
const start_loc = tree.tokenLocationLoc(0, first_token);
const end_loc = tree.tokenLocationLoc(first_token.end, last_token);
const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);
const first_token_start = token_starts[lok_token];
const start_loc = tree.tokenLocation(0, lok_token);
var text_buf = std.ArrayList(u8).init(gpa);
defer text_buf.deinit();
const writer = text_buf.writer();
try parse_error.render(tree.token_ids, writer);
try tree.renderError(parse_error, writer);
const text = text_buf.items;
const stream = file.writer();
@ -2920,8 +2920,12 @@ fn printErrMsgToFile(
}
try stream.writeByte('\n');
try stream.writeByteNTimes(' ', start_loc.column);
try stream.writeByteNTimes('~', last_token.end - first_token.start);
try stream.writeByte('\n');
if (token_tags[lok_token].lexeme()) |lexeme| {
try stream.writeByteNTimes('~', lexeme.len);
try stream.writeByte('\n');
} else {
try stream.writeAll("^\n");
}
}
pub const info_zen =

View File

@ -375,7 +375,7 @@ pub fn translate(
args_end: [*]?[*]const u8,
errors: *[]ClangErrMsg,
resources_path: [*:0]const u8,
) !*ast.Tree {
) !ast.Tree {
const ast_unit = clang.LoadFromCommandLine(
args_begin,
args_end,
@ -396,6 +396,14 @@ pub fn translate(
var arena = std.heap.ArenaAllocator.init(gpa);
errdefer arena.deinit();
if (true) {
var x = false;
if (x) {
return error.OutOfMemory;
}
@panic("TODO update translate-c");
}
var context = Context{
.gpa = gpa,
.arena = &arena.allocator,

View File

@ -357,6 +357,7 @@ pub const Inst = struct {
.ret_type,
.unreach_nocheck,
.@"unreachable",
.arg,
=> NoOp,
.alloc,
@ -449,7 +450,6 @@ pub const Inst = struct {
.block_comptime_flat,
=> Block,
.arg => Arg,
.array_type_sentinel => ArrayTypeSentinel,
.@"break" => Break,
.breakvoid => BreakVoid,
@ -685,16 +685,6 @@ pub const Inst = struct {
kw_args: struct {},
};
pub const Arg = struct {
pub const base_tag = Tag.arg;
base: Inst,
positionals: struct {
name: []const u8,
},
kw_args: struct {},
};
pub const Block = struct {
pub const base_tag = Tag.block;
base: Inst,
@ -1608,6 +1598,7 @@ const DumpTzir = struct {
.unreach,
.breakpoint,
.dbg_stmt,
.arg,
=> {},
.ref,
@ -1652,8 +1643,6 @@ const DumpTzir = struct {
try dtz.findConst(bin_op.rhs);
},
.arg => {},
.br => {
const br = inst.castTag(.br).?;
try dtz.findConst(&br.block.base);