From cc1c2bd5684a6195e9535fff38ca54ffb70ebe5a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 20 Apr 2020 19:19:35 -0400 Subject: [PATCH] simplify ZIR spec; separate parsing/rendering from analysis --- lib/std/fs.zig | 12 +- lib/std/mem.zig | 31 +- src-self-hosted/ir.zig | 716 ++++-------------------------------- src-self-hosted/ir/text.zig | 712 +++++++++++++++++++++++++++++++++++ src-self-hosted/type.zig | 66 +++- test/stage2/ir.zig | 51 ++- 6 files changed, 906 insertions(+), 682 deletions(-) create mode 100644 src-self-hosted/ir/text.zig diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 4a13d3e3e4..ef6f75601f 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1012,25 +1012,27 @@ pub const Dir = struct { /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. pub fn readFileAlloc(self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 { - return self.readFileAllocAligned(allocator, file_path, max_bytes, @alignOf(u8)); + return self.readFileAllocOptions(allocator, file_path, max_bytes, @alignOf(u8), null); } /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. - pub fn readFileAllocAligned( + /// Allows specifying alignment and a sentinel value. + pub fn readFileAllocOptions( self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize, - comptime A: u29, - ) ![]align(A) u8 { + comptime alignment: u29, + comptime optional_sentinel: ?u8, + ) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) { var file = try self.openFile(file_path, .{}); defer file.close(); const size = math.cast(usize, try file.getEndPos()) catch math.maxInt(usize); if (size > max_bytes) return error.FileTooBig; - const buf = try allocator.alignedAlloc(u8, A, size); + const buf = try allocator.allocWithOptions(u8, size, alignment, optional_sentinel); errdefer allocator.free(buf); try file.inStream().readNoEof(buf); diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 282ca61fc8..584ba116d8 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -105,6 +105,31 @@ pub const Allocator = struct { return self.alignedAlloc(T, null, n); } + pub fn allocWithOptions( + self: *Allocator, + comptime Elem: type, + n: usize, + /// null means naturally aligned + comptime optional_alignment: ?u29, + comptime optional_sentinel: ?Elem, + ) Error!AllocWithOptionsPayload(Elem, optional_alignment, optional_sentinel) { + if (optional_sentinel) |sentinel| { + const ptr = try self.alignedAlloc(Elem, optional_alignment, n + 1); + ptr[n] = sentinel; + return ptr[0..n :sentinel]; + } else { + return alignedAlloc(Elem, optional_alignment, n); + } + } + + fn AllocWithOptionsPayload(comptime Elem: type, comptime alignment: ?u29, comptime sentinel: ?Elem) type { + if (sentinel) |s| { + return [:s]align(alignment orelse @alignOf(T)) Elem; + } else { + return []align(alignment orelse @alignOf(T)) Elem; + } + } + /// Allocates an array of `n + 1` items of type `T` and sets the first `n` /// items to `undefined` and the last item to `sentinel`. Depending on the /// Allocator implementation, it may be required to call `free` once the @@ -113,10 +138,10 @@ pub const Allocator = struct { /// call `free` when done. /// /// For allocating a single item, see `create`. + /// + /// Deprecated; use `allocWithOptions`. pub fn allocSentinel(self: *Allocator, comptime Elem: type, n: usize, comptime sentinel: Elem) Error![:sentinel]Elem { - var ptr = try self.alloc(Elem, n + 1); - ptr[n] = sentinel; - return ptr[0..n :sentinel]; + return self.allocWithOptions(Elem, n, null, sentinel); } pub fn alignedAlloc( diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index eaa1aba26d..f87e06eb43 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -4,41 +4,26 @@ const Allocator = std.mem.Allocator; const Value = @import("value.zig").Value; const Type = @import("type.zig").Type; const assert = std.debug.assert; +const text = @import("ir/text.zig"); +/// These are in-memory, analyzed instructions. See `text.Inst` for the representation +/// of instructions that correspond to the ZIR text format. pub const Inst = struct { - tag: Tag, + pub fn ty(base: *Inst) ?Type { + switch (base.tag) { + .constant => return base.cast(Constant).?.ty, + .@"asm" => return base.cast(Assembly).?.ty, + .@"fn" => return base.cast(Fn).?.ty, - /// These names are used for the IR text format. - pub const Tag = enum { - constant, - ptrtoint, - fieldptr, - deref, - @"asm", - @"unreachable", - @"fn", - @"export", - }; + .ptrtoint => return Type.initTag(.@"usize"), + .@"unreachable" => return Type.initTag(.@"noreturn"), + .@"export" => return Type.initTag(.@"void"), + .fntype, .primitive => return Type.initTag(.@"type"), - pub fn TagToType(tag: Tag) type { - return switch (tag) { - .constant => Constant, - .ptrtoint => PtrToInt, - .fieldptr => FieldPtr, - .deref => Deref, - .@"asm" => Assembly, - .@"unreachable" => Unreachable, - .@"fn" => Fn, - .@"export" => Export, - }; - } - - pub fn cast(base: *Inst, comptime T: type) ?*T { - const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag; - if (base.tag != expected_tag) - return null; - - return @fieldParentPtr(T, "base", base); + .fieldptr, + .deref, + => return null, + } } /// This struct owns the `Value` memory. When the struct is deallocated, @@ -53,644 +38,70 @@ pub const Inst = struct { }, kw_args: struct {}, }; - - pub const PtrToInt = struct { - base: Inst = Inst{ .tag = .ptrtoint }, - - positionals: struct { - ptr: *Inst, - }, - kw_args: struct {}, - }; - - pub const FieldPtr = struct { - base: Inst = Inst{ .tag = .fieldptr }, - - positionals: struct { - object_ptr: *Inst, - field_name: *Inst, - }, - kw_args: struct {}, - }; - - pub const Deref = struct { - base: Inst = Inst{ .tag = .deref }, - - positionals: struct { - ptr: *Inst, - }, - kw_args: struct {}, - }; - - pub const Assembly = struct { - base: Inst = Inst{ .tag = .@"asm" }, - - positionals: struct { - asm_source: *Inst, - }, - kw_args: struct { - @"volatile": bool = false, - output: ?*Inst = null, - inputs: []*Inst = &[0]*Inst{}, - clobbers: []*Inst = &[0]*Inst{}, - args: []*Inst = &[0]*Inst{}, - }, - }; - - pub const Unreachable = struct { - base: Inst = Inst{ .tag = .@"unreachable" }, - - positionals: struct {}, - kw_args: struct {}, - }; - - pub const Fn = struct { - base: Inst = Inst{ .tag = .@"fn" }, - - positionals: struct { - body: Body, - }, - kw_args: struct { - cc: std.builtin.CallingConvention = .Unspecified, - }, - - pub const Body = struct { - instructions: []*Inst, - }; - }; - - pub const Export = struct { - base: Inst = Inst{ .tag = .@"export" }, - - positionals: struct { - symbol_name: *Inst, - value: *Inst, - }, - kw_args: struct {}, - }; }; -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, -}; - -pub const Tree = struct { - decls: []*Inst, - errors: []ErrorMsg, - - pub fn deinit(self: *Tree) void { - // TODO resource deallocation - self.* = undefined; - } - - /// This is a debugging utility for rendering the tree to stderr. - pub fn dump(self: Tree) void { - self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {}; - } - - const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body }); - - pub fn writeToStream(self: Tree, allocator: *Allocator, stream: var) !void { - // First, build a map of *Inst to @ or % indexes - var inst_table = InstPtrTable.init(allocator); - defer inst_table.deinit(); - - try inst_table.ensureCapacity(self.decls.len); - - for (self.decls) |decl, decl_i| { - try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null }); - - if (decl.cast(Inst.Fn)) |fn_inst| { - for (fn_inst.positionals.body.instructions) |inst, inst_i| { - try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body }); - } - } - } - - for (self.decls) |decl, i| { - try stream.print("@{} ", .{i}); - try self.writeInstToStream(stream, decl, &inst_table); - try stream.writeByte('\n'); - } - } - - fn writeInstToStream( - self: Tree, - stream: var, - decl: *Inst, - inst_table: *const InstPtrTable, - ) @TypeOf(stream).Error!void { - // TODO I tried implementing this with an inline for loop and hit a compiler bug - switch (decl.tag) { - .constant => return self.writeInstToStreamGeneric(stream, .constant, decl, inst_table), - .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table), - .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table), - .deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table), - .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table), - .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table), - .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table), - .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table), - } - } - - fn writeInstToStreamGeneric( - self: Tree, - stream: var, - comptime inst_tag: Inst.Tag, - base: *Inst, - inst_table: *const InstPtrTable, - ) !void { - const SpecificInst = Inst.TagToType(inst_tag); - const inst = @fieldParentPtr(SpecificInst, "base", base); - if (@hasField(SpecificInst, "ty")) { - try stream.print(": {} ", .{inst.ty}); - } - if (inst_tag == .constant) { - if (inst.positionals.value.cast(Value.Payload.Bytes)) |bytes_value| { - try stream.writeAll("= "); - return std.zig.renderStringLiteral(bytes_value.data, stream); - } else if (inst.positionals.value.cast(Value.Payload.Int_u64)) |v| { - return stream.print("= {}", .{v.int}); - } else if (inst.positionals.value.cast(Value.Payload.Int_i64)) |v| { - return stream.print("= {}", .{v.int}); - } - } - const Positionals = @TypeOf(inst.positionals); - try stream.writeAll("= " ++ @tagName(inst_tag) ++ "("); - const pos_fields = @typeInfo(Positionals).Struct.fields; - inline for (pos_fields) |arg_field, i| { - if (i != 0) { - try stream.writeAll(", "); - } - try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table); - } - - comptime var need_comma = pos_fields.len != 0; - const KW_Args = @TypeOf(inst.kw_args); - inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| { - if (need_comma) { - try stream.writeAll(",\n "); - } - if (@typeInfo(arg_field.field_type) == .Optional) { - if (@field(inst.kw_args, arg_field.name)) |non_optional| { - try stream.print("{}=", .{arg_field.name}); - try self.writeParamToStream(stream, non_optional, inst_table); - need_comma = true; - } - } else { - try stream.print("{}=", .{arg_field.name}); - try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table); - need_comma = true; - } - } - - try stream.writeByte(')'); - } - - fn writeParamToStream(self: Tree, stream: var, param: var, inst_table: *const InstPtrTable) !void { - if (@typeInfo(@TypeOf(param)) == .Enum) { - return stream.writeAll(@tagName(param)); - } - switch (@TypeOf(param)) { - Value => { - try stream.print("{}", .{param}); - }, - *Inst => return self.writeInstParamToStream(stream, param, inst_table), - []*Inst => { - try stream.writeByte('['); - for (param) |inst, i| { - if (i != 0) { - try stream.writeAll(", "); - } - try self.writeInstParamToStream(stream, inst, inst_table); - } - try stream.writeByte(']'); - }, - Inst.Fn.Body => { - try stream.writeAll("{\n"); - for (param.instructions) |inst, i| { - try stream.print(" %{} ", .{i}); - try self.writeInstToStream(stream, inst, inst_table); - try stream.writeByte('\n'); - } - try stream.writeByte('}'); - }, - bool => return stream.writeByte("01"[@boolToInt(param)]), - else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)), - } - } - - fn writeInstParamToStream(self: Tree, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void { - const info = inst_table.getValue(inst).?; - const prefix = if (info.fn_body == null) "@" else "%"; - try stream.print("{}{}", .{ prefix, info.index }); - } -}; - -const ParseContext = struct { +const Analyze = struct { allocator: *Allocator, - i: usize, - source: []const u8, + old_tree: *const Module, errors: std.ArrayList(ErrorMsg), decls: std.ArrayList(*Inst), - global_name_map: *std.StringHashMap(usize), + + const NewInst = struct { + ptr: *Inst, + }; }; -pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree { - var global_name_map = std.StringHashMap(usize).init(allocator); - defer global_name_map.deinit(); - - var ctx: ParseContext = .{ +pub fn analyze(allocator: *Allocator, old_tree: Module) !Module { + var ctx = Analyze{ .allocator = allocator, - .i = 0, - .source = source, + .old_tree = &old_tree, .decls = std.ArrayList(*Inst).init(allocator), .errors = std.ArrayList(ErrorMsg).init(allocator), - .global_name_map = &global_name_map, + .inst_table = std.HashMap(*Inst, Analyze.InstData).init(allocator), }; - parseRoot(&ctx) catch |err| switch (err) { - error.ParseFailure => { + defer ctx.decls.deinit(); + defer ctx.errors.deinit(); + defer inst_table.deinit(); + + analyzeRoot(&ctx) catch |err| switch (err) { + error.AnalyzeFailure => { assert(ctx.errors.items.len != 0); }, else => |e| return e, }; - return Tree{ + return Module{ .decls = ctx.decls.toOwnedSlice(), .errors = ctx.errors.toOwnedSlice(), }; } -pub fn parseRoot(ctx: *ParseContext) !void { - // The IR format is designed so that it can be tokenized and parsed at the same time. - while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { - ';' => _ = try skipToAndOver(ctx, '\n'), - '@' => { - ctx.i += 1; - const ident = try skipToAndOver(ctx, ' '); - const opt_type = try parseOptionalType(ctx); - const inst = try parseInstruction(ctx, opt_type, null); - const ident_index = ctx.decls.items.len; - if (try ctx.global_name_map.put(ident, ident_index)) |_| { - return parseError(ctx, "redefinition of identifier '{}'", .{ident}); - } - try ctx.decls.append(inst); - continue; - }, - ' ', '\n' => continue, - else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}), - }; -} - -fn eatByte(ctx: *ParseContext, byte: u8) bool { - if (ctx.i >= ctx.source.len) return false; - if (ctx.source[ctx.i] != byte) return false; - ctx.i += 1; - return true; -} - -fn skipSpace(ctx: *ParseContext) void { - while (ctx.i < ctx.source.len and (ctx.source[ctx.i] == ' ' or ctx.source[ctx.i] == '\n')) { - ctx.i += 1; - } -} - -fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void { - if (ctx.i + bytes.len > ctx.source.len) - return parseError(ctx, "unexpected EOF", .{}); - if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes)) - return parseError(ctx, "expected '{}'", .{bytes}); - ctx.i += bytes.len; -} - -fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 { - const start_i = ctx.i; - while (ctx.i < ctx.source.len) : (ctx.i += 1) { - if (ctx.source[ctx.i] == byte) { - const result = ctx.source[start_i..ctx.i]; - ctx.i += 1; - return result; +fn analyzeRoot(ctx: *Analyze) !void { + for (old_tree.decls) |decl| { + if (decl.cast(Inst.Export)) |export_inst| { + try analyzeExport(ctx, export_inst); } } - return parseError(ctx, "unexpected EOF", .{}); } -fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } { - const msg = try std.fmt.allocPrint(ctx.allocator, format, args); - (try ctx.errors.addOne()).* = .{ - .byte_offset = ctx.i, - .msg = msg, - }; - return error.ParseFailure; -} - -/// Regardless of whether a `Type` is returned, it skips past the '='. -fn parseOptionalType(ctx: *ParseContext) !?Type { - skipSpace(ctx); - if (eatByte(ctx, ':')) { - const type_text_untrimmed = try skipToAndOver(ctx, '='); - skipSpace(ctx); - const type_text = mem.trim(u8, type_text_untrimmed, " \n"); - if (mem.eql(u8, type_text, "usize")) { - return Type.initTag(.int_usize); - } else if (mem.eql(u8, type_text, "noreturn")) { - return Type.initTag(.no_return); - } else { - return parseError(ctx, "TODO parse type '{}'", .{type_text}); - } - } else { - skipSpace(ctx); - try requireEatBytes(ctx, "="); - skipSpace(ctx); - return null; - } -} - -fn parseInstruction( - ctx: *ParseContext, - opt_type: ?Type, - body_ctx: ?*BodyContext, -) error{ OutOfMemory, ParseFailure }!*Inst { - switch (ctx.source[ctx.i]) { - '"' => return parseStringLiteralConst(ctx, opt_type), - '0'...'9' => return parseIntegerLiteralConst(ctx, opt_type), - else => {}, - } - const fn_name = try skipToAndOver(ctx, '('); - inline for (@typeInfo(Inst.Tag).Enum.fields) |field| { - if (mem.eql(u8, field.name, fn_name)) { - const tag = @field(Inst.Tag, field.name); - return parseInstructionGeneric(ctx, field.name, Inst.TagToType(tag), opt_type, body_ctx); - } - } - return parseError(ctx, "unknown instruction '{}'", .{fn_name}); -} - -fn parseInstructionGeneric( - ctx: *ParseContext, - comptime fn_name: []const u8, - comptime InstType: type, - opt_type: ?Type, - body_ctx: ?*BodyContext, -) !*Inst { - const inst_specific = try ctx.allocator.create(InstType); - inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?; - - if (@hasField(InstType, "ty")) { - inst_specific.ty = opt_type orelse { - return parseError(ctx, "instruction '" ++ fn_name ++ "' requires type", .{}); - }; - } - - const Positionals = @TypeOf(inst_specific.positionals); - inline for (@typeInfo(Positionals).Struct.fields) |arg_field| { - if (ctx.source[ctx.i] == ',') { - ctx.i += 1; - skipSpace(ctx); - } else if (ctx.source[ctx.i] == ')') { - return parseError(ctx, "expected positional parameter '{}'", .{arg_field.name}); - } - @field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric( - ctx, - arg_field.field_type, - body_ctx, - ); - skipSpace(ctx); - } - - const KW_Args = @TypeOf(inst_specific.kw_args); - inst_specific.kw_args = .{}; // assign defaults - skipSpace(ctx); - while (eatByte(ctx, ',')) { - skipSpace(ctx); - const name = try skipToAndOver(ctx, '='); - inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| { - const field_name = arg_field.name; - if (mem.eql(u8, name, field_name)) { - const NonOptional = switch (@typeInfo(arg_field.field_type)) { - .Optional => |info| info.child, - else => arg_field.field_type, - }; - @field(inst_specific.kw_args, field_name) = try parseParameterGeneric(ctx, NonOptional, body_ctx); - break; - } - } else { - return parseError(ctx, "unrecognized keyword parameter: '{}'", .{name}); - } - skipSpace(ctx); - } - try requireEatBytes(ctx, ")"); - - return &inst_specific.base; -} - -fn parseParameterGeneric(ctx: *ParseContext, comptime T: type, body_ctx: ?*BodyContext) !T { - if (@typeInfo(T) == .Enum) { - const start = ctx.i; - while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { - ' ', '\n', ',', ')' => { - const enum_name = ctx.source[start..ctx.i]; - return std.meta.stringToEnum(T, enum_name) orelse { - return parseError(ctx, "tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) }); - }; - }, - else => continue, - }; - return parseError(ctx, "unexpected EOF in enum parameter", .{}); - } - switch (T) { - Inst.Fn.Body => return parseBody(ctx), - bool => { - const bool_value = switch (ctx.source[ctx.i]) { - '0' => false, - '1' => true, - else => |byte| return parseError(ctx, "expected '0' or '1' for boolean value, found {c}", .{byte}), - }; - ctx.i += 1; - return bool_value; - }, - []*Inst => { - try requireEatBytes(ctx, "["); - skipSpace(ctx); - if (eatByte(ctx, ']')) return &[0]*Inst{}; - - var instructions = std.ArrayList(*Inst).init(ctx.allocator); - defer instructions.deinit(); - while (true) { - skipSpace(ctx); - try instructions.append(try parseParameterInst(ctx, body_ctx)); - skipSpace(ctx); - if (!eatByte(ctx, ',')) break; - } - try requireEatBytes(ctx, "]"); - return instructions.toOwnedSlice(); - }, - *Inst => return parseParameterInst(ctx, body_ctx), - Value => return parseError(ctx, "TODO implement parseParameterGeneric for type Value", .{}), - else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)), - } - return parseError(ctx, "TODO parse parameter {}", .{@typeName(T)}); -} - -fn parseParameterInst(ctx: *ParseContext, body_ctx: ?*BodyContext) !*Inst { - const local_ref = switch (ctx.source[ctx.i]) { - '@' => false, - '%' => true, - '"' => { - const str_lit_inst = try parseStringLiteralConst(ctx, null); - try ctx.decls.append(str_lit_inst); - return str_lit_inst; - }, - else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}), - }; - const map = if (local_ref) - if (body_ctx) |bc| - &bc.name_map - else - return parseError(ctx, "referencing a % instruction in global scope", .{}) - else - ctx.global_name_map; - - ctx.i += 1; - const name_start = ctx.i; - while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { - ' ', '\n', ',', ')', ']' => break, - else => continue, - }; - const ident = ctx.source[name_start..ctx.i]; - const kv = map.get(ident) orelse { - const bad_name = ctx.source[name_start - 1 .. ctx.i]; - ctx.i = name_start - 1; - return parseError(ctx, "unrecognized identifier: {}", .{bad_name}); - }; - if (local_ref) { - return body_ctx.?.instructions.items[kv.value]; - } else { - return ctx.decls.items[kv.value]; - } -} - -const BodyContext = struct { - instructions: std.ArrayList(*Inst), - name_map: std.StringHashMap(usize), -}; - -fn parseBody(ctx: *ParseContext) !Inst.Fn.Body { - var body_context = BodyContext{ - .instructions = std.ArrayList(*Inst).init(ctx.allocator), - .name_map = std.StringHashMap(usize).init(ctx.allocator), - }; - defer body_context.instructions.deinit(); - defer body_context.name_map.deinit(); - - try requireEatBytes(ctx, "{"); - skipSpace(ctx); - - while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { - ';' => _ = try skipToAndOver(ctx, '\n'), - '%' => { - ctx.i += 1; - const ident = try skipToAndOver(ctx, ' '); - const opt_type = try parseOptionalType(ctx); - const inst = try parseInstruction(ctx, opt_type, &body_context); - const ident_index = body_context.instructions.items.len; - if (try body_context.name_map.put(ident, ident_index)) |_| { - return parseError(ctx, "redefinition of identifier '{}'", .{ident}); - } - try body_context.instructions.append(inst); - continue; - }, - ' ', '\n' => continue, - '}' => { - ctx.i += 1; - break; - }, - else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}), +fn analyzeExport(ctx: *Analyze, export_inst: *Inst.Export) !void { + const old_decl = export_inst.positionals.value; + const new_info = ctx.inst_table.get(old_exp_target) orelse blk: { + const new_decl = try analyzeDecl(ctx, old_decl); + const new_info: Analyze.NewInst = .{ .ptr = new_decl }; + try ctx.inst_table.put(old_decl, new_info); + break :blk new_info; }; - return Inst.Fn.Body{ - .instructions = body_context.instructions.toOwnedSlice(), - }; -} - -fn parseStringLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst { - const start = ctx.i; - ctx.i += 1; // skip over '"' - - while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { - '"' => { - ctx.i += 1; - const span = ctx.source[start..ctx.i]; - var bad_index: usize = undefined; - const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch |err| switch (err) { - error.InvalidCharacter => { - ctx.i = start + bad_index; - const bad_byte = ctx.source[ctx.i]; - return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte}); - }, - else => |e| return e, - }; - const const_inst = try ctx.allocator.create(Inst.Constant); - errdefer ctx.allocator.destroy(const_inst); - - const bytes_payload = try ctx.allocator.create(Value.Payload.Bytes); - errdefer ctx.allocator.destroy(bytes_payload); - bytes_payload.* = .{ .data = parsed }; - - const ty = opt_type orelse blk: { - const array_payload = try ctx.allocator.create(Type.Payload.Array_u8_Sentinel0); - errdefer ctx.allocator.destroy(array_payload); - array_payload.* = .{ .len = parsed.len }; - - const ty_payload = try ctx.allocator.create(Type.Payload.SingleConstPointer); - errdefer ctx.allocator.destroy(ty_payload); - ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) }; - - break :blk Type.initPayload(&ty_payload.base); - }; - - const_inst.* = .{ - .ty = ty, - .positionals = .{ .value = Value.initPayload(&bytes_payload.base) }, - .kw_args = .{}, - }; - return &const_inst.base; - }, - '\\' => { - ctx.i += 1; - if (ctx.i >= ctx.source.len) break; - continue; - }, - else => continue, - }; - return parseError(ctx, "unexpected EOF in string literal", .{}); -} - -fn parseIntegerLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst { - const start = ctx.i; - while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { - '0'...'9' => continue, - else => break, - }; - const number_text = ctx.source[start..ctx.i]; - const number = std.fmt.parseInt(u64, number_text, 10) catch |err| switch (err) { - error.Overflow => return parseError(ctx, "TODO handle big integers", .{}), - error.InvalidCharacter => return parseError(ctx, "invalid integer literal", .{}), - }; - - const int_payload = try ctx.allocator.create(Value.Payload.Int_u64); - errdefer ctx.allocator.destroy(int_payload); - int_payload.* = .{ .int = number }; - - const const_inst = try ctx.allocator.create(Inst.Constant); - errdefer ctx.allocator.destroy(const_inst); - - const_inst.* = .{ - .ty = opt_type orelse Type.initTag(.int_comptime), - .positionals = .{ .value = Value.initPayload(&int_payload.base) }, - .kw_args = .{}, - }; - return &const_inst.base; + //const exp_type = new_info.ptr.ty(); + //switch (exp_type.zigTypeTag()) { + // .Fn => { + // if () |kv| { + // kv.value + // } + // return analyzeExportFn(ctx, exp_target.cast(Inst., + // }, + // else => return ctx.fail("unable to export type '{}'", .{exp_type}), + //} } pub fn main() anyerror!void { @@ -703,9 +114,9 @@ pub fn main() anyerror!void { const src_path = args[1]; const debug_error_trace = true; - const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32)); + const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0); - var tree = try parse(allocator, source); + var tree = try text.parse(allocator, source); defer tree.deinit(); if (tree.errors.len != 0) { @@ -719,8 +130,19 @@ pub fn main() anyerror!void { tree.dump(); - //const new_tree = try semanticallyAnalyze(tree); + //const new_tree = try analyze(allocator, tree); //defer new_tree.deinit(); + + //if (new_tree.errors.len != 0) { + // for (new_tree.errors) |err_msg| { + // const loc = findLineColumn(source, err_msg.byte_offset); + // std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); + // } + // if (debug_error_trace) return error.ParseFailure; + // std.process.exit(1); + //} + + //new_tree.dump(); } fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } { @@ -741,4 +163,4 @@ fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, } // Performance optimization ideas: -// * make the source code sentinel-terminated, so that all the checks against the length can be skipped +// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig new file mode 100644 index 0000000000..e5169010f3 --- /dev/null +++ b/src-self-hosted/ir/text.zig @@ -0,0 +1,712 @@ +//! This file has to do with parsing and rendering the ZIR text format. +const std = @import("std"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const Value = @import("../value.zig").Value; +const assert = std.debug.assert; +const ir = @import("../ir.zig"); +const BigInt = std.math.big.Int; + +/// These are instructions that correspond to the ZIR text format. See `ir.Inst` for +/// in-memory, analyzed instructions with types and values. +pub const Inst = struct { + tag: Tag, + + /// These names are used directly as the instruction names in the text format. + pub const Tag = enum { + str, + int, + ptrtoint, + fieldptr, + deref, + as, + @"asm", + @"unreachable", + @"fn", + @"export", + primitive, + fntype, + }; + + pub fn TagToType(tag: Tag) type { + return switch (tag) { + .str => Str, + .int => Int, + .ptrtoint => PtrToInt, + .fieldptr => FieldPtr, + .deref => Deref, + .as => As, + .@"asm" => Assembly, + .@"unreachable" => Unreachable, + .@"fn" => Fn, + .@"export" => Export, + .primitive => Primitive, + .fntype => FnType, + }; + } + + pub fn cast(base: *Inst, comptime T: type) ?*T { + const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag; + if (base.tag != expected_tag) + return null; + + return @fieldParentPtr(T, "base", base); + } + + pub const Str = struct { + base: Inst = Inst{ .tag = .str }, + + positionals: struct { + bytes: []u8, + }, + kw_args: struct {}, + }; + + pub const Int = struct { + base: Inst = Inst{ .tag = .int }, + + positionals: struct { + int: BigInt, + }, + kw_args: struct {}, + }; + + pub const PtrToInt = struct { + base: Inst = Inst{ .tag = .ptrtoint }, + + positionals: struct { + ptr: *Inst, + }, + kw_args: struct {}, + }; + + pub const FieldPtr = struct { + base: Inst = Inst{ .tag = .fieldptr }, + + positionals: struct { + object_ptr: *Inst, + field_name: *Inst, + }, + kw_args: struct {}, + }; + + pub const Deref = struct { + base: Inst = Inst{ .tag = .deref }, + + positionals: struct { + ptr: *Inst, + }, + kw_args: struct {}, + }; + + pub const As = struct { + base: Inst = Inst{ .tag = .as }, + + positionals: struct { + dest_type: *Inst, + value: *Inst, + }, + kw_args: struct {}, + }; + + pub const Assembly = struct { + base: Inst = Inst{ .tag = .@"asm" }, + + positionals: struct { + asm_source: *Inst, + return_type: *Inst, + }, + kw_args: struct { + @"volatile": bool = false, + output: ?*Inst = null, + inputs: []*Inst = &[0]*Inst{}, + clobbers: []*Inst = &[0]*Inst{}, + args: []*Inst = &[0]*Inst{}, + }, + }; + + pub const Unreachable = struct { + base: Inst = Inst{ .tag = .@"unreachable" }, + + positionals: struct {}, + kw_args: struct {}, + }; + + pub const Fn = struct { + base: Inst = Inst{ .tag = .@"fn" }, + + positionals: struct { + fn_type: *Inst, + body: Body, + }, + kw_args: struct {}, + + pub const Body = struct { + instructions: []*Inst, + }; + }; + + pub const Export = struct { + base: Inst = Inst{ .tag = .@"export" }, + + positionals: struct { + symbol_name: *Inst, + value: *Inst, + }, + kw_args: struct {}, + }; + + pub const Primitive = struct { + base: Inst = Inst{ .tag = .primitive }, + + positionals: struct { + tag: BuiltinType, + }, + kw_args: struct {}, + + pub const BuiltinType = enum { + @"isize", + @"usize", + @"c_short", + @"c_ushort", + @"c_int", + @"c_uint", + @"c_long", + @"c_ulong", + @"c_longlong", + @"c_ulonglong", + @"c_longdouble", + @"c_void", + @"f16", + @"f32", + @"f64", + @"f128", + @"bool", + @"void", + @"noreturn", + @"type", + @"anyerror", + @"comptime_int", + @"comptime_float", + }; + }; + + pub const FnType = struct { + base: Inst = Inst{ .tag = .fntype }, + + positionals: struct { + param_types: []*Inst, + return_type: *Inst, + }, + kw_args: struct { + cc: std.builtin.CallingConvention = .Unspecified, + }, + }; +}; + +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, +}; + +pub const Module = struct { + decls: []*Inst, + errors: []ErrorMsg, + + pub fn deinit(self: *Module) void { + // TODO resource deallocation + self.* = undefined; + } + + /// This is a debugging utility for rendering the tree to stderr. + pub fn dump(self: Module) void { + self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {}; + } + + const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body }); + + pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void { + // First, build a map of *Inst to @ or % indexes + var inst_table = InstPtrTable.init(allocator); + defer inst_table.deinit(); + + try inst_table.ensureCapacity(self.decls.len); + + for (self.decls) |decl, decl_i| { + try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null }); + + if (decl.cast(Inst.Fn)) |fn_inst| { + for (fn_inst.positionals.body.instructions) |inst, inst_i| { + try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body }); + } + } + } + + for (self.decls) |decl, i| { + try stream.print("@{} ", .{i}); + try self.writeInstToStream(stream, decl, &inst_table); + try stream.writeByte('\n'); + } + } + + fn writeInstToStream( + self: Module, + stream: var, + decl: *Inst, + inst_table: *const InstPtrTable, + ) @TypeOf(stream).Error!void { + // TODO I tried implementing this with an inline for loop and hit a compiler bug + switch (decl.tag) { + .str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table), + .int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table), + .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table), + .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table), + .deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table), + .as => return self.writeInstToStreamGeneric(stream, .as, decl, inst_table), + .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table), + .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table), + .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table), + .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table), + .primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table), + .fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table), + } + } + + fn writeInstToStreamGeneric( + self: Module, + stream: var, + comptime inst_tag: Inst.Tag, + base: *Inst, + inst_table: *const InstPtrTable, + ) !void { + const SpecificInst = Inst.TagToType(inst_tag); + const inst = @fieldParentPtr(SpecificInst, "base", base); + const Positionals = @TypeOf(inst.positionals); + try stream.writeAll("= " ++ @tagName(inst_tag) ++ "("); + const pos_fields = @typeInfo(Positionals).Struct.fields; + inline for (pos_fields) |arg_field, i| { + if (i != 0) { + try stream.writeAll(", "); + } + try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table); + } + + comptime var need_comma = pos_fields.len != 0; + const KW_Args = @TypeOf(inst.kw_args); + inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| { + if (need_comma) { + try stream.writeAll(", "); + } + if (@typeInfo(arg_field.field_type) == .Optional) { + if (@field(inst.kw_args, arg_field.name)) |non_optional| { + try stream.print("{}=", .{arg_field.name}); + try self.writeParamToStream(stream, non_optional, inst_table); + need_comma = true; + } + } else { + try stream.print("{}=", .{arg_field.name}); + try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table); + need_comma = true; + } + } + + try stream.writeByte(')'); + } + + fn writeParamToStream(self: Module, stream: var, param: var, inst_table: *const InstPtrTable) !void { + if (@typeInfo(@TypeOf(param)) == .Enum) { + return stream.writeAll(@tagName(param)); + } + switch (@TypeOf(param)) { + Value => return stream.print("{}", .{param}), + *Inst => return self.writeInstParamToStream(stream, param, inst_table), + []*Inst => { + try stream.writeByte('['); + for (param) |inst, i| { + if (i != 0) { + try stream.writeAll(", "); + } + try self.writeInstParamToStream(stream, inst, inst_table); + } + try stream.writeByte(']'); + }, + Inst.Fn.Body => { + try stream.writeAll("{\n"); + for (param.instructions) |inst, i| { + try stream.print(" %{} ", .{i}); + try self.writeInstToStream(stream, inst, inst_table); + try stream.writeByte('\n'); + } + try stream.writeByte('}'); + }, + bool => return stream.writeByte("01"[@boolToInt(param)]), + []u8 => return std.zig.renderStringLiteral(param, stream), + BigInt => return stream.print("{}", .{param}), + else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)), + } + } + + fn writeInstParamToStream(self: Module, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void { + const info = inst_table.getValue(inst).?; + const prefix = if (info.fn_body == null) "@" else "%"; + try stream.print("{}{}", .{ prefix, info.index }); + } +}; + +pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module { + var global_name_map = std.StringHashMap(usize).init(allocator); + defer global_name_map.deinit(); + + var parser: Parser = .{ + .allocator = allocator, + .i = 0, + .source = source, + .decls = std.ArrayList(*Inst).init(allocator), + .errors = std.ArrayList(ErrorMsg).init(allocator), + .global_name_map = &global_name_map, + }; + parser.parseRoot() catch |err| switch (err) { + error.ParseFailure => { + assert(parser.errors.items.len != 0); + }, + else => |e| return e, + }; + return Module{ + .decls = parser.decls.toOwnedSlice(), + .errors = parser.errors.toOwnedSlice(), + }; +} + +const Parser = struct { + allocator: *Allocator, + i: usize, + source: [:0]const u8, + errors: std.ArrayList(ErrorMsg), + decls: std.ArrayList(*Inst), + global_name_map: *std.StringHashMap(usize), + + const Body = struct { + instructions: std.ArrayList(*Inst), + name_map: std.StringHashMap(usize), + }; + + fn parseBody(self: *Parser) !Inst.Fn.Body { + var body_context = Body{ + .instructions = std.ArrayList(*Inst).init(self.allocator), + .name_map = std.StringHashMap(usize).init(self.allocator), + }; + defer body_context.instructions.deinit(); + defer body_context.name_map.deinit(); + + try requireEatBytes(self, "{"); + skipSpace(self); + + while (true) : (self.i += 1) switch (self.source[self.i]) { + ';' => _ = try skipToAndOver(self, '\n'), + '%' => { + self.i += 1; + const ident = try skipToAndOver(self, ' '); + skipSpace(self); + try requireEatBytes(self, "="); + skipSpace(self); + const inst = try parseInstruction(self, &body_context); + const ident_index = body_context.instructions.items.len; + if (try body_context.name_map.put(ident, ident_index)) |_| { + return self.fail("redefinition of identifier '{}'", .{ident}); + } + try body_context.instructions.append(inst); + continue; + }, + ' ', '\n' => continue, + '}' => { + self.i += 1; + break; + }, + else => |byte| return self.failByte(byte), + }; + + return Inst.Fn.Body{ + .instructions = body_context.instructions.toOwnedSlice(), + }; + } + + fn parseStringLiteral(self: *Parser) ![]u8 { + const start = self.i; + try self.requireEatBytes("\""); + + while (true) : (self.i += 1) switch (self.source[self.i]) { + '"' => { + self.i += 1; + const span = self.source[start..self.i]; + var bad_index: usize = undefined; + const parsed = std.zig.parseStringLiteral(self.allocator, span, &bad_index) catch |err| switch (err) { + error.InvalidCharacter => { + self.i = start + bad_index; + const bad_byte = self.source[self.i]; + return self.fail("invalid string literal character: '{c}'\n", .{bad_byte}); + }, + else => |e| return e, + }; + return parsed; + }, + '\\' => { + self.i += 1; + continue; + }, + 0 => return self.failByte(0), + else => continue, + }; + } + + fn parseIntegerLiteral(self: *Parser) !BigInt { + const start = self.i; + if (self.source[self.i] == '-') self.i += 1; + while (true) : (self.i += 1) switch (self.source[self.i]) { + '0'...'9' => continue, + else => break, + }; + const number_text = self.source[start..self.i]; + var result = try BigInt.init(self.allocator); + result.setString(10, number_text) catch |err| { + self.i = start; + switch (err) { + error.InvalidBase => unreachable, + error.InvalidCharForDigit => return self.fail("invalid digit in integer literal", .{}), + error.DigitTooLargeForBase => return self.fail("digit too large in integer literal", .{}), + else => |e| return e, + } + }; + return result; + } + + fn parseRoot(self: *Parser) !void { + // The IR format is designed so that it can be tokenized and parsed at the same time. + while (true) : (self.i += 1) switch (self.source[self.i]) { + ';' => _ = try skipToAndOver(self, '\n'), + '@' => { + self.i += 1; + const ident = try skipToAndOver(self, ' '); + skipSpace(self); + try requireEatBytes(self, "="); + skipSpace(self); + const inst = try parseInstruction(self, null); + const ident_index = self.decls.items.len; + if (try self.global_name_map.put(ident, ident_index)) |_| { + return self.fail("redefinition of identifier '{}'", .{ident}); + } + try self.decls.append(inst); + continue; + }, + ' ', '\n' => continue, + 0 => break, + else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}), + }; + } + + fn eatByte(self: *Parser, byte: u8) bool { + if (self.source[self.i] != byte) return false; + self.i += 1; + return true; + } + + fn skipSpace(self: *Parser) void { + while (self.source[self.i] == ' ' or self.source[self.i] == '\n') { + self.i += 1; + } + } + + fn requireEatBytes(self: *Parser, bytes: []const u8) !void { + const start = self.i; + for (bytes) |byte| { + if (self.source[self.i] != byte) { + self.i = start; + return self.fail("expected '{}'", .{bytes}); + } + self.i += 1; + } + } + + fn skipToAndOver(self: *Parser, byte: u8) ![]const u8 { + const start_i = self.i; + while (self.source[self.i] != 0) : (self.i += 1) { + if (self.source[self.i] == byte) { + const result = self.source[start_i..self.i]; + self.i += 1; + return result; + } + } + return self.fail("unexpected EOF", .{}); + } + + /// ParseFailure is an internal error code; handled in `parse`. + const InnerError = error{ ParseFailure, OutOfMemory }; + + fn failByte(self: *Parser, byte: u8) InnerError { + if (byte == 0) { + return self.fail("unexpected EOF", .{}); + } else { + return self.fail("unexpected byte: '{c}'", .{byte}); + } + } + + fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError { + @setCold(true); + const msg = try std.fmt.allocPrint(self.allocator, format, args); + (try self.errors.addOne()).* = .{ + .byte_offset = self.i, + .msg = msg, + }; + return error.ParseFailure; + } + + fn parseInstruction(self: *Parser, body_ctx: ?*Body) InnerError!*Inst { + const fn_name = try skipToAndOver(self, '('); + inline for (@typeInfo(Inst.Tag).Enum.fields) |field| { + if (mem.eql(u8, field.name, fn_name)) { + const tag = @field(Inst.Tag, field.name); + return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx); + } + } + return self.fail("unknown instruction '{}'", .{fn_name}); + } + + fn parseInstructionGeneric( + self: *Parser, + comptime fn_name: []const u8, + comptime InstType: type, + body_ctx: ?*Body, + ) !*Inst { + const inst_specific = try self.allocator.create(InstType); + inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?; + + if (@hasField(InstType, "ty")) { + inst_specific.ty = opt_type orelse { + return self.fail("instruction '" ++ fn_name ++ "' requires type", .{}); + }; + } + + const Positionals = @TypeOf(inst_specific.positionals); + inline for (@typeInfo(Positionals).Struct.fields) |arg_field| { + if (self.source[self.i] == ',') { + self.i += 1; + skipSpace(self); + } else if (self.source[self.i] == ')') { + return self.fail("expected positional parameter '{}'", .{arg_field.name}); + } + @field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric( + self, + arg_field.field_type, + body_ctx, + ); + skipSpace(self); + } + + const KW_Args = @TypeOf(inst_specific.kw_args); + inst_specific.kw_args = .{}; // assign defaults + skipSpace(self); + while (eatByte(self, ',')) { + skipSpace(self); + const name = try skipToAndOver(self, '='); + inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| { + const field_name = arg_field.name; + if (mem.eql(u8, name, field_name)) { + const NonOptional = switch (@typeInfo(arg_field.field_type)) { + .Optional => |info| info.child, + else => arg_field.field_type, + }; + @field(inst_specific.kw_args, field_name) = try parseParameterGeneric(self, NonOptional, body_ctx); + break; + } + } else { + return self.fail("unrecognized keyword parameter: '{}'", .{name}); + } + skipSpace(self); + } + try requireEatBytes(self, ")"); + + return &inst_specific.base; + } + + fn parseParameterGeneric(self: *Parser, comptime T: type, body_ctx: ?*Body) !T { + if (@typeInfo(T) == .Enum) { + const start = self.i; + while (true) : (self.i += 1) switch (self.source[self.i]) { + ' ', '\n', ',', ')' => { + const enum_name = self.source[start..self.i]; + return std.meta.stringToEnum(T, enum_name) orelse { + return self.fail("tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) }); + }; + }, + 0 => return self.failByte(0), + else => continue, + }; + } + switch (T) { + Inst.Fn.Body => return parseBody(self), + bool => { + const bool_value = switch (self.source[self.i]) { + '0' => false, + '1' => true, + else => |byte| return self.fail("expected '0' or '1' for boolean value, found {c}", .{byte}), + }; + self.i += 1; + return bool_value; + }, + []*Inst => { + try requireEatBytes(self, "["); + skipSpace(self); + if (eatByte(self, ']')) return &[0]*Inst{}; + + var instructions = std.ArrayList(*Inst).init(self.allocator); + defer instructions.deinit(); + while (true) { + skipSpace(self); + try instructions.append(try parseParameterInst(self, body_ctx)); + skipSpace(self); + if (!eatByte(self, ',')) break; + } + try requireEatBytes(self, "]"); + return instructions.toOwnedSlice(); + }, + *Inst => return parseParameterInst(self, body_ctx), + Value => return self.fail("TODO implement parseParameterGeneric for type Value", .{}), + []u8 => return self.parseStringLiteral(), + BigInt => return self.parseIntegerLiteral(), + else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)), + } + return self.fail("TODO parse parameter {}", .{@typeName(T)}); + } + + fn parseParameterInst(self: *Parser, body_ctx: ?*Body) !*Inst { + const local_ref = switch (self.source[self.i]) { + '@' => false, + '%' => true, + else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}), + }; + const map = if (local_ref) + if (body_ctx) |bc| + &bc.name_map + else + return self.fail("referencing a % instruction in global scope", .{}) + else + self.global_name_map; + + self.i += 1; + const name_start = self.i; + while (true) : (self.i += 1) switch (self.source[self.i]) { + 0, ' ', '\n', ',', ')', ']' => break, + else => continue, + }; + const ident = self.source[name_start..self.i]; + const kv = map.get(ident) orelse { + const bad_name = self.source[name_start - 1 .. self.i]; + self.i = name_start - 1; + return self.fail("unrecognized identifier: {}", .{bad_name}); + }; + if (local_ref) { + return body_ctx.?.instructions.items[kv.value]; + } else { + return self.decls.items[kv.value]; + } + } +}; diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 380c82ff68..6745366752 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -18,7 +18,7 @@ pub const Type = extern union { pub fn zigTypeTag(self: Type) std.builtin.TypeId { switch (self.tag()) { - .int_u8, .int_usize => return .Int, + .@"u8", .@"usize" => return .Int, .array_u8, .array_u8_sentinel_0 => return .Array, .single_const_pointer => return .Pointer, } @@ -52,10 +52,35 @@ pub const Type = extern union { var ty = self; while (true) { switch (ty.tag()) { - .no_return => return out_stream.writeAll("noreturn"), - .int_comptime => return out_stream.writeAll("comptime_int"), - .int_u8 => return out_stream.writeAll("u8"), - .int_usize => return out_stream.writeAll("usize"), + @"u8", + @"i8", + @"isize", + @"usize", + @"noreturn", + @"void", + @"c_short", + @"c_ushort", + @"c_int", + @"c_uint", + @"c_long", + @"c_ulong", + @"c_longlong", + @"c_ulonglong", + @"c_longdouble", + @"c_void", + @"f16", + @"f32", + @"f64", + @"f128", + @"bool", + @"void", + @"type", + @"anyerror", + @"comptime_int", + @"comptime_float", + @"noreturn", + => |t| return out_stream.writeAll(@tagName(t)), + .array_u8_sentinel_0 => { const payload = @fieldParentPtr(Payload.Array_u8_Sentinel0, "base", ty.ptr_otherwise); return out_stream.print("[{}:0]u8", .{payload.len}); @@ -85,17 +110,38 @@ pub const Type = extern union { /// See `zigTypeTag` for the function that corresponds to `std.builtin.TypeId`. pub const Tag = enum { // The first section of this enum are tags that require no payload. - no_return, - int_comptime, - int_u8, - int_usize, // See last_no_payload_tag below. + @"u8", + @"i8", + @"isize", + @"usize", + @"c_short", + @"c_ushort", + @"c_int", + @"c_uint", + @"c_long", + @"c_ulong", + @"c_longlong", + @"c_ulonglong", + @"c_longdouble", + @"c_void", + @"f16", + @"f32", + @"f64", + @"f128", + @"bool", + @"void", + @"type", + @"anyerror", + @"comptime_int", + @"comptime_float", + @"noreturn", // See last_no_payload_tag below. // After this, the tag requires a payload. array_u8_sentinel_0, array, single_const_pointer, - pub const last_no_payload_tag = Tag.int_usize; + pub const last_no_payload_tag = Tag.@"noreturn"; pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1; }; diff --git a/test/stage2/ir.zig b/test/stage2/ir.zig index 498db3a4eb..450d8fa102 100644 --- a/test/stage2/ir.zig +++ b/test/stage2/ir.zig @@ -1,33 +1,50 @@ test "hello world IR" { exeCmp( - \\@0 = "Hello, world!\n" + \\@0 = str("Hello, world!\n") + \\@1 = primitive(void) + \\@2 = primitive(usize) + \\@3 = fntype([], @1, cc=Naked) + \\@4 = int(0) + \\@5 = int(1) + \\@6 = int(231) + \\@7 = str("len") \\ - \\@1 = fn({ - \\ %0 : usize = 1 ;SYS_write - \\ %1 : usize = 1 ;STDOUT_FILENO + \\@8 = fn(@3, { + \\ %0 = as(@2, @5) ; SYS_write + \\ %1 = as(@2, @5) ; STDOUT_FILENO \\ %2 = ptrtoint(@0) ; msg ptr - \\ %3 = fieldptr(@0, "len") ; msg len ptr + \\ %3 = fieldptr(@0, @7) ; msg len ptr \\ %4 = deref(%3) ; msg len - \\ %5 = asm("syscall", + \\ %sysoutreg = str("={rax}") + \\ %rax = str("{rax}") + \\ %rdi = str("{rdi}") + \\ %rsi = str("{rsi}") + \\ %rdx = str("{rdx}") + \\ %rcx = str("rcx") + \\ %r11 = str("r11") + \\ %memory = str("memory") + \\ %syscall = str("syscall") + \\ %5 = asm(%syscall, @2, \\ volatile=1, - \\ output="={rax}", - \\ inputs=["{rax}", "{rdi}", "{rsi}", "{rdx}"], - \\ clobbers=["rcx", "r11", "memory"], + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi, %rsi, %rdx], + \\ clobbers=[%rcx, %r11, %memory], \\ args=[%0, %1, %2, %4]) \\ - \\ %6 : usize = 231 ;SYS_exit_group - \\ %7 : usize = 0 ;exit code - \\ %8 = asm("syscall", + \\ %6 = as(@2, @6) ;SYS_exit_group + \\ %7 = as(@2, @4) ;exit code + \\ %8 = asm(%syscall, @2, \\ volatile=1, - \\ output="={rax}", - \\ inputs=["{rax}", "{rdi}"], - \\ clobbers=["rcx", "r11", "memory"], + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi], + \\ clobbers=[%rcx, %r11, %memory], \\ args=[%6, %7]) \\ \\ %9 = unreachable() - \\}, cc=naked) + \\}) \\ - \\@2 = export("_start", @1) + \\@9 = str("_start") + \\@10 = export(@9, @8) , \\Hello, world! \\