From ae080b5c217fbcfd350a5d52b8b4626a95540ab3 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 5 May 2020 15:26:36 -0400 Subject: [PATCH 01/31] readme: remove the part about stage2 until it's done --- README.md | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/README.md b/README.md index 3bb9302cd2..916f2e16f3 100644 --- a/README.md +++ b/README.md @@ -71,40 +71,3 @@ can do about it. See that issue for a workaround you can do in the meantime. ##### Windows See https://github.com/ziglang/zig/wiki/Building-Zig-on-Windows - -### Stage 2: Build Self-Hosted Zig from Zig Source Code - -*Note: Stage 2 compiler is not complete. Beta users of Zig should use the -Stage 1 compiler for now.* - -Dependencies are the same as Stage 1, except now you can use stage 1 to compile -Zig code. - -``` -bin/zig build --prefix $(pwd)/stage2 -``` - -This produces `./stage2/bin/zig` which can be used for testing and development. -Once it is feature complete, it will be used to build stage 3 - the final compiler -binary. - -### Stage 3: Rebuild Self-Hosted Zig Using the Self-Hosted Compiler - -*Note: Stage 2 compiler is not yet able to build Stage 3. Building Stage 3 is -not yet supported.* - -Once the self-hosted compiler can build itself, this will be the actual -compiler binary that we will install to the system. Until then, users should -use stage 1. - -#### Debug / Development Build - -``` -./stage2/bin/zig build --prefix $(pwd)/stage3 -``` - -#### Release / Install Build - -``` -./stage2/bin/zig build install -Drelease -``` From a32d3a85d21d614e5960b9eadcd85374954b910f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 10 May 2020 02:05:54 -0400 Subject: [PATCH 02/31] rework self-hosted compiler for incremental builds * introduce std.ArrayListUnmanaged for when you have the allocator stored elsewhere * move std.heap.ArenaAllocator implementation to its own file. extract the main state into std.heap.ArenaAllocator.State, which can be stored as an alternative to storing the entire ArenaAllocator, saving 24 bytes per ArenaAllocator on 64 bit targets. * std.LinkedList.Node pointer field now defaults to being null initialized. * Rework self-hosted compiler Package API * Delete almost all the bitrotted self-hosted compiler code. The only bit rotted code left is in main.zig and compilation.zig * Add call instruction to ZIR * self-hosted compiler ir API and link API are reworked to support a long-running compiler that incrementally updates declarations * Introduce the concept of scopes to ZIR semantic analysis * ZIR text format supports referencing named decls that are declared later in the file * Figure out how memory management works for the long-running compiler and incremental compilation. The main roots are top level declarations. There is a table of decls. The key is a cryptographic hash of the fully qualified decl name. Each decl has an arena allocator where all of the memory related to that decl is stored. Each code block has its own arena allocator for the lifetime of the block. Values that want to survive when going out of scope in a block must get copied into the outer block. Finally, values must get copied into the Decl arena to be long-lived. * Delete the unused MemoryCell struct. Instead, comptime pointers are based on references to Decl structs. * Figure out how caching works. Each Decl will store a set of other Decls which must be recompiled when it changes. This branch is still work-in-progress; this commit breaks the build. --- lib/std/array_list.zig | 243 +++++- lib/std/heap.zig | 90 +-- lib/std/heap/arena_allocator.zig | 102 +++ lib/std/linked_list.zig | 2 +- lib/std/std.zig | 4 +- src-self-hosted/Package.zig | 52 ++ src-self-hosted/c.zig | 7 - src-self-hosted/codegen.zig | 58 +- src-self-hosted/compilation.zig | 89 ++- src-self-hosted/decl.zig | 102 --- src-self-hosted/ir.zig | 1068 +++++++++++++++++-------- src-self-hosted/ir/text.zig | 182 +++-- src-self-hosted/libc_installation.zig | 1 - src-self-hosted/link.zig | 598 ++++++++------ src-self-hosted/package.zig | 31 - src-self-hosted/scope.zig | 418 ---------- src-self-hosted/test.zig | 11 +- src-self-hosted/type.zig | 63 +- src-self-hosted/util.zig | 47 -- src-self-hosted/value.zig | 186 ++--- src-self-hosted/visib.zig | 4 - 21 files changed, 1845 insertions(+), 1513 deletions(-) create mode 100644 lib/std/heap/arena_allocator.zig create mode 100644 src-self-hosted/Package.zig delete mode 100644 src-self-hosted/c.zig delete mode 100644 src-self-hosted/decl.zig delete mode 100644 src-self-hosted/package.zig delete mode 100644 src-self-hosted/scope.zig delete mode 100644 src-self-hosted/util.zig delete mode 100644 src-self-hosted/visib.zig diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index 97fca103bb..a47b7cde41 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -8,13 +8,13 @@ const Allocator = mem.Allocator; /// A contiguous, growable list of items in memory. /// This is a wrapper around an array of T values. Initialize with `init`. pub fn ArrayList(comptime T: type) type { - return AlignedArrayList(T, null); + return ArrayListAligned(T, null); } -pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { +pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { if (alignment) |a| { if (a == @alignOf(T)) { - return AlignedArrayList(T, null); + return ArrayListAligned(T, null); } } return struct { @@ -76,6 +76,10 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { }; } + pub fn toUnmanaged(self: Self) ArrayListAlignedUnmanaged(T, alignment) { + return .{ .items = self.items, .capacity = self.capacity }; + } + /// The caller owns the returned memory. ArrayList becomes empty. pub fn toOwnedSlice(self: *Self) Slice { const allocator = self.allocator; @@ -84,8 +88,8 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { return result; } - /// Insert `item` at index `n`. Moves `list[n .. list.len]` - /// to make room. + /// Insert `item` at index `n` by moving `list[n .. list.len]` to make room. + /// This operation is O(N). pub fn insert(self: *Self, n: usize, item: T) !void { try self.ensureCapacity(self.items.len + 1); self.items.len += 1; @@ -94,8 +98,7 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { self.items[n] = item; } - /// Insert slice `items` at index `i`. Moves - /// `list[i .. list.len]` to make room. + /// Insert slice `items` at index `i` by moving `list[i .. list.len]` to make room. /// This operation is O(N). pub fn insertSlice(self: *Self, i: usize, items: SliceConst) !void { try self.ensureCapacity(self.items.len + items.len); @@ -259,6 +262,232 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { }; } +/// Bring-your-own allocator with every function call. +/// Initialize directly and deinitialize with `deinit` or use `toOwnedSlice`. +pub fn init() Self { + return .{ + .items = &[_]T{}, + .capacity = 0, + }; +} + +pub fn ArrayListUnmanaged(comptime T: type) type { + return ArrayListAlignedUnmanaged(T, null); +} + +pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) type { + if (alignment) |a| { + if (a == @alignOf(T)) { + return ArrayListAlignedUnmanaged(T, null); + } + } + return struct { + const Self = @This(); + + /// Content of the ArrayList. + items: Slice = &[_]T{}, + capacity: usize = 0, + + pub const Slice = if (alignment) |a| ([]align(a) T) else []T; + pub const SliceConst = if (alignment) |a| ([]align(a) const T) else []const T; + + /// Initialize with capacity to hold at least num elements. + /// Deinitialize with `deinit` or use `toOwnedSlice`. + pub fn initCapacity(allocator: *Allocator, num: usize) !Self { + var self = Self.init(allocator); + try self.ensureCapacity(allocator, num); + return self; + } + + /// Release all allocated memory. + pub fn deinit(self: *Self, allocator: *Allocator) void { + allocator.free(self.allocatedSlice()); + self.* = undefined; + } + + pub fn toManaged(self: *Self, allocator: *Allocator) ArrayListAligned(T, alignment) { + return .{ .items = self.items, .capacity = self.capacity, .allocator = allocator }; + } + + /// The caller owns the returned memory. ArrayList becomes empty. + pub fn toOwnedSlice(self: *Self, allocator: *Allocator) Slice { + const result = allocator.shrink(self.allocatedSlice(), self.items.len); + self.* = init(allocator); + return result; + } + + /// Insert `item` at index `n`. Moves `list[n .. list.len]` + /// to make room. + pub fn insert(self: *Self, allocator: *Allocator, n: usize, item: T) !void { + try self.ensureCapacity(allocator, self.items.len + 1); + self.items.len += 1; + + mem.copyBackwards(T, self.items[n + 1 .. self.items.len], self.items[n .. self.items.len - 1]); + self.items[n] = item; + } + + /// Insert slice `items` at index `i`. Moves + /// `list[i .. list.len]` to make room. + /// This operation is O(N). + pub fn insertSlice(self: *Self, allocator: *Allocator, i: usize, items: SliceConst) !void { + try self.ensureCapacity(allocator, self.items.len + items.len); + self.items.len += items.len; + + mem.copyBackwards(T, self.items[i + items.len .. self.items.len], self.items[i .. self.items.len - items.len]); + mem.copy(T, self.items[i .. i + items.len], items); + } + + /// Extend the list by 1 element. Allocates more memory as necessary. + pub fn append(self: *Self, allocator: *Allocator, item: T) !void { + const new_item_ptr = try self.addOne(allocator); + new_item_ptr.* = item; + } + + /// Extend the list by 1 element, but asserting `self.capacity` + /// is sufficient to hold an additional item. + pub fn appendAssumeCapacity(self: *Self, item: T) void { + const new_item_ptr = self.addOneAssumeCapacity(); + new_item_ptr.* = item; + } + + /// Remove the element at index `i` from the list and return its value. + /// Asserts the array has at least one item. + /// This operation is O(N). + pub fn orderedRemove(self: *Self, i: usize) T { + const newlen = self.items.len - 1; + if (newlen == i) return self.pop(); + + const old_item = self.items[i]; + for (self.items[i..newlen]) |*b, j| b.* = self.items[i + 1 + j]; + self.items[newlen] = undefined; + self.items.len = newlen; + return old_item; + } + + /// Removes the element at the specified index and returns it. + /// The empty slot is filled from the end of the list. + /// This operation is O(1). + pub fn swapRemove(self: *Self, i: usize) T { + if (self.items.len - 1 == i) return self.pop(); + + const old_item = self.items[i]; + self.items[i] = self.pop(); + return old_item; + } + + /// Append the slice of items to the list. Allocates more + /// memory as necessary. + pub fn appendSlice(self: *Self, allocator: *Allocator, items: SliceConst) !void { + const oldlen = self.items.len; + const newlen = self.items.len + items.len; + + try self.ensureCapacity(allocator, newlen); + self.items.len = newlen; + mem.copy(T, self.items[oldlen..], items); + } + + /// Same as `append` except it returns the number of bytes written, which is always the same + /// as `m.len`. The purpose of this function existing is to match `std.io.OutStream` API. + /// This function may be called only when `T` is `u8`. + fn appendWrite(self: *Self, allocator: *Allocator, m: []const u8) !usize { + try self.appendSlice(allocator, m); + return m.len; + } + + /// Append a value to the list `n` times. + /// Allocates more memory as necessary. + pub fn appendNTimes(self: *Self, allocator: *Allocator, value: T, n: usize) !void { + const old_len = self.items.len; + try self.resize(self.items.len + n); + mem.set(T, self.items[old_len..self.items.len], value); + } + + /// Adjust the list's length to `new_len`. + /// Does not initialize added items if any. + pub fn resize(self: *Self, allocator: *Allocator, new_len: usize) !void { + try self.ensureCapacity(allocator, new_len); + self.items.len = new_len; + } + + /// Reduce allocated capacity to `new_len`. + /// Invalidates element pointers. + pub fn shrink(self: *Self, allocator: *Allocator, new_len: usize) void { + assert(new_len <= self.items.len); + + self.items = allocator.realloc(self.allocatedSlice(), new_len) catch |e| switch (e) { + error.OutOfMemory => { // no problem, capacity is still correct then. + self.items.len = new_len; + return; + }, + }; + self.capacity = new_len; + } + + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { + var better_capacity = self.capacity; + if (better_capacity >= new_capacity) return; + + while (true) { + better_capacity += better_capacity / 2 + 8; + if (better_capacity >= new_capacity) break; + } + + const new_memory = try allocator.realloc(self.allocatedSlice(), better_capacity); + self.items.ptr = new_memory.ptr; + self.capacity = new_memory.len; + } + + /// Increases the array's length to match the full capacity that is already allocated. + /// The new elements have `undefined` values. + /// This operation does not invalidate any element pointers. + pub fn expandToCapacity(self: *Self) void { + self.items.len = self.capacity; + } + + /// Increase length by 1, returning pointer to the new item. + /// The returned pointer becomes invalid when the list is resized. + pub fn addOne(self: *Self, allocator: *Allocator) !*T { + const newlen = self.items.len + 1; + try self.ensureCapacity(allocator, newlen); + return self.addOneAssumeCapacity(); + } + + /// Increase length by 1, returning pointer to the new item. + /// Asserts that there is already space for the new item without allocating more. + /// The returned pointer becomes invalid when the list is resized. + /// This operation does not invalidate any element pointers. + pub fn addOneAssumeCapacity(self: *Self) *T { + assert(self.items.len < self.capacity); + + self.items.len += 1; + return &self.items[self.items.len - 1]; + } + + /// Remove and return the last element from the list. + /// Asserts the list has at least one item. + /// This operation does not invalidate any element pointers. + pub fn pop(self: *Self) T { + const val = self.items[self.items.len - 1]; + self.items.len -= 1; + return val; + } + + /// Remove and return the last element from the list. + /// If the list is empty, returns `null`. + /// This operation does not invalidate any element pointers. + pub fn popOrNull(self: *Self) ?T { + if (self.items.len == 0) return null; + return self.pop(); + } + + /// For a nicer API, `items.len` is the length, not the capacity. + /// This requires "unsafe" slicing. + fn allocatedSlice(self: Self) Slice { + return self.items.ptr[0..self.capacity]; + } + }; +} + test "std.ArrayList.init" { var list = ArrayList(i32).init(testing.allocator); defer list.deinit(); diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 3e00ca5d59..6bbb688ef0 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -11,6 +11,7 @@ const maxInt = std.math.maxInt; pub const LoggingAllocator = @import("heap/logging_allocator.zig").LoggingAllocator; pub const loggingAllocator = @import("heap/logging_allocator.zig").loggingAllocator; +pub const ArenaAllocator = @import("heap/arena_allocator.zig").ArenaAllocator; const Allocator = mem.Allocator; @@ -510,95 +511,6 @@ pub const HeapAllocator = switch (builtin.os.tag) { else => @compileError("Unsupported OS"), }; -/// This allocator takes an existing allocator, wraps it, and provides an interface -/// where you can allocate without freeing, and then free it all together. -pub const ArenaAllocator = struct { - allocator: Allocator, - - child_allocator: *Allocator, - buffer_list: std.SinglyLinkedList([]u8), - end_index: usize, - - const BufNode = std.SinglyLinkedList([]u8).Node; - - pub fn init(child_allocator: *Allocator) ArenaAllocator { - return ArenaAllocator{ - .allocator = Allocator{ - .reallocFn = realloc, - .shrinkFn = shrink, - }, - .child_allocator = child_allocator, - .buffer_list = std.SinglyLinkedList([]u8).init(), - .end_index = 0, - }; - } - - pub fn deinit(self: ArenaAllocator) void { - var it = self.buffer_list.first; - while (it) |node| { - // this has to occur before the free because the free frees node - const next_it = node.next; - self.child_allocator.free(node.data); - it = next_it; - } - } - - fn createNode(self: *ArenaAllocator, prev_len: usize, minimum_size: usize) !*BufNode { - const actual_min_size = minimum_size + @sizeOf(BufNode); - var len = prev_len; - while (true) { - len += len / 2; - len += mem.page_size - @rem(len, mem.page_size); - if (len >= actual_min_size) break; - } - const buf = try self.child_allocator.alignedAlloc(u8, @alignOf(BufNode), len); - const buf_node_slice = mem.bytesAsSlice(BufNode, buf[0..@sizeOf(BufNode)]); - const buf_node = &buf_node_slice[0]; - buf_node.* = BufNode{ - .data = buf, - .next = null, - }; - self.buffer_list.prepend(buf_node); - self.end_index = 0; - return buf_node; - } - - fn alloc(allocator: *Allocator, n: usize, alignment: u29) ![]u8 { - const self = @fieldParentPtr(ArenaAllocator, "allocator", allocator); - - var cur_node = if (self.buffer_list.first) |first_node| first_node else try self.createNode(0, n + alignment); - while (true) { - const cur_buf = cur_node.data[@sizeOf(BufNode)..]; - const addr = @ptrToInt(cur_buf.ptr) + self.end_index; - const adjusted_addr = mem.alignForward(addr, alignment); - const adjusted_index = self.end_index + (adjusted_addr - addr); - const new_end_index = adjusted_index + n; - if (new_end_index > cur_buf.len) { - cur_node = try self.createNode(cur_buf.len, n + alignment); - continue; - } - const result = cur_buf[adjusted_index..new_end_index]; - self.end_index = new_end_index; - return result; - } - } - - fn realloc(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) ![]u8 { - if (new_size <= old_mem.len and new_align <= new_size) { - // We can't do anything with the memory, so tell the client to keep it. - return error.OutOfMemory; - } else { - const result = try alloc(allocator, new_size, new_align); - @memcpy(result.ptr, old_mem.ptr, std.math.min(old_mem.len, result.len)); - return result; - } - } - - fn shrink(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) []u8 { - return old_mem[0..new_size]; - } -}; - pub const FixedBufferAllocator = struct { allocator: Allocator, end_index: usize, diff --git a/lib/std/heap/arena_allocator.zig b/lib/std/heap/arena_allocator.zig new file mode 100644 index 0000000000..daed17d6b3 --- /dev/null +++ b/lib/std/heap/arena_allocator.zig @@ -0,0 +1,102 @@ +const std = @import("../std.zig"); +const assert = std.debug.assert; +const mem = std.mem; +const Allocator = std.mem.Allocator; + +/// This allocator takes an existing allocator, wraps it, and provides an interface +/// where you can allocate without freeing, and then free it all together. +pub const ArenaAllocator = struct { + allocator: Allocator, + + child_allocator: *Allocator, + state: State, + + /// Inner state of ArenaAllocator. Can be stored rather than the entire ArenaAllocator + /// as a memory-saving optimization. + pub const State = struct { + buffer_list: std.SinglyLinkedList([]u8) = @as(std.SinglyLinkedList([]u8), .{}), + end_index: usize = 0, + + pub fn promote(self: State, child_allocator: *Allocator) ArenaAllocator { + return .{ + .allocator = Allocator{ + .reallocFn = realloc, + .shrinkFn = shrink, + }, + .child_allocator = child_allocator, + .state = self, + }; + } + }; + + const BufNode = std.SinglyLinkedList([]u8).Node; + + pub fn init(child_allocator: *Allocator) ArenaAllocator { + return (State{}).promote(child_allocator); + } + + pub fn deinit(self: ArenaAllocator) void { + var it = self.state.buffer_list.first; + while (it) |node| { + // this has to occur before the free because the free frees node + const next_it = node.next; + self.child_allocator.free(node.data); + it = next_it; + } + } + + fn createNode(self: *ArenaAllocator, prev_len: usize, minimum_size: usize) !*BufNode { + const actual_min_size = minimum_size + @sizeOf(BufNode); + var len = prev_len; + while (true) { + len += len / 2; + len += mem.page_size - @rem(len, mem.page_size); + if (len >= actual_min_size) break; + } + const buf = try self.child_allocator.alignedAlloc(u8, @alignOf(BufNode), len); + const buf_node_slice = mem.bytesAsSlice(BufNode, buf[0..@sizeOf(BufNode)]); + const buf_node = &buf_node_slice[0]; + buf_node.* = BufNode{ + .data = buf, + .next = null, + }; + self.state.buffer_list.prepend(buf_node); + self.state.end_index = 0; + return buf_node; + } + + fn alloc(allocator: *Allocator, n: usize, alignment: u29) ![]u8 { + const self = @fieldParentPtr(ArenaAllocator, "allocator", allocator); + + var cur_node = if (self.state.buffer_list.first) |first_node| first_node else try self.createNode(0, n + alignment); + while (true) { + const cur_buf = cur_node.data[@sizeOf(BufNode)..]; + const addr = @ptrToInt(cur_buf.ptr) + self.state.end_index; + const adjusted_addr = mem.alignForward(addr, alignment); + const adjusted_index = self.state.end_index + (adjusted_addr - addr); + const new_end_index = adjusted_index + n; + if (new_end_index > cur_buf.len) { + cur_node = try self.createNode(cur_buf.len, n + alignment); + continue; + } + const result = cur_buf[adjusted_index..new_end_index]; + self.state.end_index = new_end_index; + return result; + } + } + + fn realloc(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) ![]u8 { + if (new_size <= old_mem.len and new_align <= new_size) { + // We can't do anything with the memory, so tell the client to keep it. + return error.OutOfMemory; + } else { + const result = try alloc(allocator, new_size, new_align); + @memcpy(result.ptr, old_mem.ptr, std.math.min(old_mem.len, result.len)); + return result; + } + } + + fn shrink(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) []u8 { + return old_mem[0..new_size]; + } +}; diff --git a/lib/std/linked_list.zig b/lib/std/linked_list.zig index 23201dbf94..50acbb2c9f 100644 --- a/lib/std/linked_list.zig +++ b/lib/std/linked_list.zig @@ -49,7 +49,7 @@ pub fn SinglyLinkedList(comptime T: type) type { } }; - first: ?*Node, + first: ?*Node = null, /// Initialize a linked list. /// diff --git a/lib/std/std.zig b/lib/std/std.zig index cd6f347429..376c200200 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -1,6 +1,8 @@ -pub const AlignedArrayList = @import("array_list.zig").AlignedArrayList; pub const ArrayList = @import("array_list.zig").ArrayList; +pub const ArrayListAligned = @import("array_list.zig").ArrayListAligned; +pub const ArrayListAlignedUnmanaged = @import("array_list.zig").ArrayListAlignedUnmanaged; pub const ArrayListSentineled = @import("array_list_sentineled.zig").ArrayListSentineled; +pub const ArrayListUnmanaged = @import("array_list.zig").ArrayListUnmanaged; pub const AutoHashMap = @import("hash_map.zig").AutoHashMap; pub const BloomFilter = @import("bloom_filter.zig").BloomFilter; pub const BufMap = @import("buf_map.zig").BufMap; diff --git a/src-self-hosted/Package.zig b/src-self-hosted/Package.zig new file mode 100644 index 0000000000..eaf37f379d --- /dev/null +++ b/src-self-hosted/Package.zig @@ -0,0 +1,52 @@ +pub const Table = std.StringHashMap(*Package); + +root_src_dir: std.fs.Dir, +/// Relative to `root_src_dir`. +root_src_path: []const u8, +table: Table, + +/// No references to `root_src_dir` and `root_src_path` are kept. +pub fn create( + allocator: *mem.Allocator, + base_dir: std.fs.Dir, + /// Relative to `base_dir`. + root_src_dir: []const u8, + /// Relative to `root_src_dir`. + root_src_path: []const u8, +) !*Package { + const ptr = try allocator.create(Package); + errdefer allocator.destroy(ptr); + const root_src_path_dupe = try mem.dupe(allocator, u8, root_src_path); + errdefer allocator.free(root_src_path_dupe); + ptr.* = .{ + .root_src_dir = try base_dir.openDir(root_src_dir, .{}), + .root_src_path = root_src_path_dupe, + .table = Table.init(allocator), + }; + return ptr; +} + +pub fn destroy(self: *Package) void { + const allocator = self.table.allocator; + self.root_src_dir.close(); + allocator.free(self.root_src_path); + { + var it = self.table.iterator(); + while (it.next()) |kv| { + allocator.free(kv.key); + } + } + self.table.deinit(); + allocator.destroy(self); +} + +pub fn add(self: *Package, name: []const u8, package: *Package) !void { + const name_dupe = try mem.dupe(self.table.allocator, u8, name); + errdefer self.table.allocator.deinit(name_dupe); + const entry = try self.table.put(name_dupe, package); + assert(entry == null); +} + +const std = @import("std"); +const mem = std.mem; +const assert = std.debug.assert; diff --git a/src-self-hosted/c.zig b/src-self-hosted/c.zig deleted file mode 100644 index ae9a886d1b..0000000000 --- a/src-self-hosted/c.zig +++ /dev/null @@ -1,7 +0,0 @@ -pub usingnamespace @cImport({ - @cDefine("__STDC_CONSTANT_MACROS", ""); - @cDefine("__STDC_LIMIT_MACROS", ""); - @cInclude("inttypes.h"); - @cInclude("config.h"); - @cInclude("zig_llvm.h"); -}); diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 675b8faad2..501f8717ea 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -6,38 +6,24 @@ const Type = @import("type.zig").Type; const Value = @import("value.zig").Value; const Target = std.Target; -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, -}; - -pub const Symbol = struct { - errors: []ErrorMsg, - - pub fn deinit(self: *Symbol, allocator: *mem.Allocator) void { - for (self.errors) |err| { - allocator.free(err.msg); - } - allocator.free(self.errors); - self.* = undefined; - } -}; - -pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !Symbol { +pub fn generateSymbol( + typed_value: ir.TypedValue, + module: ir.Module, + code: *std.ArrayList(u8), + errors: *std.ArrayList(ir.ErrorMsg), +) !void { switch (typed_value.ty.zigTypeTag()) { .Fn => { - const index = typed_value.val.cast(Value.Payload.Function).?.index; - const module_fn = module.fns[index]; + const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; var function = Function{ .module = &module, - .mod_fn = &module_fn, + .mod_fn = module_fn, .code = code, .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator), - .errors = std.ArrayList(ErrorMsg).init(code.allocator), + .errors = errors, }; defer function.inst_table.deinit(); - defer function.errors.deinit(); for (module_fn.body.instructions) |inst| { const new_inst = function.genFuncInst(inst) catch |err| switch (err) { @@ -52,7 +38,7 @@ pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std. return Symbol{ .errors = function.errors.toOwnedSlice() }; }, - else => @panic("TODO implement generateSymbol for non-function types"), + else => @panic("TODO implement generateSymbol for non-function decls"), } } @@ -61,7 +47,7 @@ const Function = struct { mod_fn: *const ir.Module.Fn, code: *std.ArrayList(u8), inst_table: std.AutoHashMap(*ir.Inst, MCValue), - errors: std.ArrayList(ErrorMsg), + errors: *std.ArrayList(ir.ErrorMsg), const MCValue = union(enum) { none, @@ -78,6 +64,7 @@ const Function = struct { fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue { switch (inst.tag) { .breakpoint => return self.genBreakpoint(inst.src), + .call => return self.genCall(inst.cast(ir.Inst.Call).?), .unreach => return MCValue{ .unreach = {} }, .constant => unreachable, // excluded from function bodies .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?), @@ -101,6 +88,13 @@ const Function = struct { return .unreach; } + fn genCall(self: *Function, inst: *ir.Inst.Call) !MCValue { + switch (self.module.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.module.target.cpu.arch}), + } + return .unreach; + } + fn genRet(self: *Function, inst: *ir.Inst.Ret) !MCValue { switch (self.module.target.cpu.arch) { .i386, .x86_64 => { @@ -140,6 +134,7 @@ const Function = struct { fn genRelativeFwdJump(self: *Function, src: usize, amount: u32) !void { switch (self.module.target.cpu.arch) { .i386, .x86_64 => { + // TODO x86 treats the operands as signed if (amount <= std.math.maxInt(u8)) { try self.code.resize(self.code.items.len + 2); self.code.items[self.code.items.len - 2] = 0xeb; @@ -433,14 +428,11 @@ const Function = struct { fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } { @setCold(true); - const msg = try std.fmt.allocPrint(self.errors.allocator, format, args); - { - errdefer self.errors.allocator.free(msg); - (try self.errors.addOne()).* = .{ - .byte_offset = src, - .msg = msg, - }; - } + try self.errors.ensureCapacity(self.errors.items.len + 1); + self.errors.appendAssumeCapacity(.{ + .byte_offset = src, + .msg = try std.fmt.allocPrint(self.errors.allocator, format, args), + }); return error.CodegenFail; } }; diff --git a/src-self-hosted/compilation.zig b/src-self-hosted/compilation.zig index cee4a48f93..75be005c83 100644 --- a/src-self-hosted/compilation.zig +++ b/src-self-hosted/compilation.zig @@ -19,7 +19,6 @@ const AtomicOrder = builtin.AtomicOrder; const Scope = @import("scope.zig").Scope; const Decl = @import("decl.zig").Decl; const ir = @import("ir.zig"); -const Visib = @import("visib.zig").Visib; const Value = @import("value.zig").Value; const Type = Value.Type; const Span = errmsg.Span; @@ -30,7 +29,11 @@ const link = @import("link.zig").link; const LibCInstallation = @import("libc_installation.zig").LibCInstallation; const CInt = @import("c_int.zig").CInt; const fs = std.fs; -const util = @import("util.zig"); + +pub const Visib = enum { + Private, + Pub, +}; const max_src_size = 2 * 1024 * 1024 * 1024; // 2 GiB @@ -45,7 +48,7 @@ pub const ZigCompiler = struct { native_libc: event.Future(LibCInstallation), - var lazy_init_targets = std.once(util.initializeAllTargets); + var lazy_init_targets = std.once(initializeAllTargets); pub fn init(allocator: *Allocator) !ZigCompiler { lazy_init_targets.call(); @@ -119,6 +122,8 @@ pub const LlvmHandle = struct { }; pub const Compilation = struct { + pub const FnLinkSet = std.TailQueue(?*Value.Fn); + zig_compiler: *ZigCompiler, name: ArrayListSentineled(u8, 0), llvm_triple: ArrayListSentineled(u8, 0), @@ -152,8 +157,6 @@ pub const Compilation = struct { /// it uses an optional pointer so that tombstone removals are possible fn_link_set: event.Locked(FnLinkSet) = event.Locked(FnLinkSet).init(FnLinkSet.init()), - pub const FnLinkSet = std.TailQueue(?*Value.Fn); - link_libs_list: ArrayList(*LinkLib), libc_link_lib: ?*LinkLib = null, @@ -361,8 +364,7 @@ pub const Compilation = struct { return comp; } else if (await frame) |_| unreachable else |err| return err; } - - async fn createAsync( + fn createAsync( out_comp: *?*Compilation, zig_compiler: *ZigCompiler, name: []const u8, @@ -372,7 +374,7 @@ pub const Compilation = struct { build_mode: builtin.Mode, is_static: bool, zig_lib_dir: []const u8, - ) !void { + ) callconv(.Async) !void { const allocator = zig_compiler.allocator; // TODO merge this line with stage2.zig crossTargetToTarget @@ -442,8 +444,8 @@ pub const Compilation = struct { } comp.name = try ArrayListSentineled(u8, 0).init(comp.arena(), name); - comp.llvm_triple = try util.getLLVMTriple(comp.arena(), target); - comp.llvm_target = try util.llvmTargetFromTriple(comp.llvm_triple); + comp.llvm_triple = try getLLVMTriple(comp.arena(), target); + comp.llvm_target = try llvmTargetFromTriple(comp.llvm_triple); comp.zig_std_dir = try fs.path.join(comp.arena(), &[_][]const u8{ zig_lib_dir, "std" }); const opt_level = switch (build_mode) { @@ -726,8 +728,7 @@ pub const Compilation = struct { fn start(self: *Compilation) void { self.main_loop_future.resolve(); } - - async fn mainLoop(self: *Compilation) void { + fn mainLoop(self: *Compilation) callconv(.Async) void { // wait until start() is called _ = self.main_loop_future.get(); @@ -790,8 +791,7 @@ pub const Compilation = struct { build_result = group.wait(); } } - - async fn rebuildFile(self: *Compilation, root_scope: *Scope.Root) BuildError!void { + fn rebuildFile(self: *Compilation, root_scope: *Scope.Root) callconv(.Async) BuildError!void { const tree_scope = blk: { const source_code = fs.cwd().readFileAlloc( self.gpa(), @@ -964,15 +964,14 @@ pub const Compilation = struct { try link(self); } } - /// caller takes ownership of resulting Code - async fn genAndAnalyzeCode( + fn genAndAnalyzeCode( comp: *Compilation, tree_scope: *Scope.AstTree, scope: *Scope, node: *ast.Node, expected_type: ?*Type, - ) !*ir.Code { + ) callconv(.Async) !*ir.Code { const unanalyzed_code = try ir.gen( comp, node, @@ -1000,13 +999,12 @@ pub const Compilation = struct { return analyzed_code; } - - async fn addCompTimeBlock( + fn addCompTimeBlock( comp: *Compilation, tree_scope: *Scope.AstTree, scope: *Scope, comptime_node: *ast.Node.Comptime, - ) BuildError!void { + ) callconv(.Async) BuildError!void { const void_type = Type.Void.get(comp); defer void_type.base.base.deref(comp); @@ -1024,12 +1022,11 @@ pub const Compilation = struct { }; analyzed_code.destroy(comp.gpa()); } - - async fn addTopLevelDecl( + fn addTopLevelDecl( self: *Compilation, decl: *Decl, locked_table: *Decl.Table, - ) BuildError!void { + ) callconv(.Async) BuildError!void { const is_export = decl.isExported(decl.tree_scope.tree); if (is_export) { @@ -1065,11 +1062,10 @@ pub const Compilation = struct { try self.prelink_group.call(addCompileErrorAsync, .{ self, msg }); } - - async fn addCompileErrorAsync( + fn addCompileErrorAsync( self: *Compilation, msg: *Msg, - ) BuildError!void { + ) callconv(.Async) BuildError!void { errdefer msg.destroy(); const compile_errors = self.compile_errors.acquire(); @@ -1077,8 +1073,7 @@ pub const Compilation = struct { try compile_errors.value.append(msg); } - - async fn verifyUniqueSymbol(self: *Compilation, decl: *Decl) BuildError!void { + fn verifyUniqueSymbol(self: *Compilation, decl: *Decl) callconv(.Async) BuildError!void { const exported_symbol_names = self.exported_symbol_names.acquire(); defer exported_symbol_names.release(); @@ -1129,8 +1124,7 @@ pub const Compilation = struct { } return link_lib; } - - async fn startFindingNativeLibC(self: *Compilation) void { + fn startFindingNativeLibC(self: *Compilation) callconv(.Async) void { event.Loop.startCpuBoundOperation(); // we don't care if it fails, we're just trying to kick off the future resolution _ = self.zig_compiler.getNativeLibC() catch return; @@ -1234,7 +1228,7 @@ pub const Compilation = struct { } /// This declaration has been blessed as going into the final code generation. - pub async fn resolveDecl(comp: *Compilation, decl: *Decl) BuildError!void { + pub fn resolveDecl(comp: *Compilation, decl: *Decl) callconv(.Async) BuildError!void { if (decl.resolution.start()) |ptr| return ptr.*; decl.resolution.data = try generateDecl(comp, decl); @@ -1335,8 +1329,7 @@ fn generateDeclFn(comp: *Compilation, fn_decl: *Decl.Fn) !void { try comp.prelink_group.call(codegen.renderToLlvm, .{ comp, fn_val, analyzed_code }); try comp.prelink_group.call(addFnToLinkSet, .{ comp, fn_val }); } - -async fn addFnToLinkSet(comp: *Compilation, fn_val: *Value.Fn) Compilation.BuildError!void { +fn addFnToLinkSet(comp: *Compilation, fn_val: *Value.Fn) callconv(.Async) Compilation.BuildError!void { fn_val.base.ref(); defer fn_val.base.deref(comp); @@ -1432,3 +1425,33 @@ fn generateDeclFnProto(comp: *Compilation, fn_decl: *Decl.Fn) !void { fn_decl.value = .{ .FnProto = fn_proto_val }; symbol_name_consumed = true; } + +pub fn llvmTargetFromTriple(triple: [:0]const u8) !*llvm.Target { + var result: *llvm.Target = undefined; + var err_msg: [*:0]u8 = undefined; + if (llvm.GetTargetFromTriple(triple, &result, &err_msg) != 0) { + std.debug.warn("triple: {s} error: {s}\n", .{ triple, err_msg }); + return error.UnsupportedTarget; + } + return result; +} + +pub fn initializeAllTargets() void { + llvm.InitializeAllTargets(); + llvm.InitializeAllTargetInfos(); + llvm.InitializeAllTargetMCs(); + llvm.InitializeAllAsmPrinters(); + llvm.InitializeAllAsmParsers(); +} + +pub fn getLLVMTriple(allocator: *std.mem.Allocator, target: std.Target) ![:0]u8 { + var result = try std.ArrayListSentineled(u8, 0).initSize(allocator, 0); + defer result.deinit(); + + try result.outStream().print( + "{}-unknown-{}-{}", + .{ @tagName(target.cpu.arch), @tagName(target.os.tag), @tagName(target.abi) }, + ); + + return result.toOwnedSlice(); +} diff --git a/src-self-hosted/decl.zig b/src-self-hosted/decl.zig deleted file mode 100644 index e68a1458d6..0000000000 --- a/src-self-hosted/decl.zig +++ /dev/null @@ -1,102 +0,0 @@ -const std = @import("std"); -const Allocator = mem.Allocator; -const mem = std.mem; -const ast = std.zig.ast; -const Visib = @import("visib.zig").Visib; -const event = std.event; -const Value = @import("value.zig").Value; -const Token = std.zig.Token; -const errmsg = @import("errmsg.zig"); -const Scope = @import("scope.zig").Scope; -const Compilation = @import("compilation.zig").Compilation; - -pub const Decl = struct { - id: Id, - name: []const u8, - visib: Visib, - resolution: event.Future(Compilation.BuildError!void), - parent_scope: *Scope, - - // TODO when we destroy the decl, deref the tree scope - tree_scope: *Scope.AstTree, - - pub const Table = std.StringHashMap(*Decl); - - pub fn cast(base: *Decl, comptime T: type) ?*T { - if (base.id != @field(Id, @typeName(T))) return null; - return @fieldParentPtr(T, "base", base); - } - - pub fn isExported(base: *const Decl, tree: *ast.Tree) bool { - switch (base.id) { - .Fn => { - const fn_decl = @fieldParentPtr(Fn, "base", base); - return fn_decl.isExported(tree); - }, - else => return false, - } - } - - pub fn getSpan(base: *const Decl) errmsg.Span { - switch (base.id) { - .Fn => { - const fn_decl = @fieldParentPtr(Fn, "base", base); - const fn_proto = fn_decl.fn_proto; - const start = fn_proto.fn_token; - const end = fn_proto.name_token orelse start; - return errmsg.Span{ - .first = start, - .last = end + 1, - }; - }, - else => @panic("TODO"), - } - } - - pub fn findRootScope(base: *const Decl) *Scope.Root { - return base.parent_scope.findRoot(); - } - - pub const Id = enum { - Var, - Fn, - CompTime, - }; - - pub const Var = struct { - base: Decl, - }; - - pub const Fn = struct { - base: Decl, - value: union(enum) { - Unresolved, - Fn: *Value.Fn, - FnProto: *Value.FnProto, - }, - fn_proto: *ast.Node.FnProto, - - pub fn externLibName(self: Fn, tree: *ast.Tree) ?[]const u8 { - return if (self.fn_proto.extern_export_inline_token) |tok_index| x: { - const token = tree.tokens.at(tok_index); - break :x switch (token.id) { - .Extern => tree.tokenSlicePtr(token), - else => null, - }; - } else null; - } - - pub fn isExported(self: Fn, tree: *ast.Tree) bool { - if (self.fn_proto.extern_export_inline_token) |tok_index| { - const token = tree.tokens.at(tok_index); - return token.id == .Keyword_export; - } else { - return false; - } - } - }; - - pub const CompTime = struct { - base: Decl, - }; -}; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 6e58236ca8..e32e8cdaea 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -1,12 +1,16 @@ const std = @import("std"); const mem = std.mem; const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; +const LinkedList = std.TailQueue; const Value = @import("value.zig").Value; const Type = @import("type.zig").Type; const assert = std.debug.assert; const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; const Target = std.Target; +const Package = @import("Package.zig"); +const link = @import("link.zig"); pub const text = @import("ir/text.zig"); @@ -25,6 +29,7 @@ pub const Inst = struct { assembly, bitcast, breakpoint, + call, cmp, condbr, constant, @@ -84,6 +89,15 @@ pub const Inst = struct { args: void, }; + pub const Call = struct { + pub const base_tag = Tag.call; + base: Inst, + args: struct { + func: *Inst, + args: []const *Inst, + }, + }; + pub const Cmp = struct { pub const base_tag = Tag.cmp; @@ -158,170 +172,416 @@ pub const TypedValue = struct { val: Value, }; -pub const Module = struct { - exports: []Export, - errors: []ErrorMsg, - arena: std.heap.ArenaAllocator, - fns: []Fn, - target: Target, - link_mode: std.builtin.LinkMode, - output_mode: std.builtin.OutputMode, - object_format: std.Target.ObjectFormat, - optimize_mode: std.builtin.Mode, +fn swapRemoveElem(allocator: *Allocator, comptime T: type, item: T, list: *ArrayListUnmanaged(T)) void { + var i: usize = 0; + while (i < list.items.len) { + if (list.items[i] == item) { + list.swapRemove(allocator, i); + continue; + } + i += 1; + } +} - pub const Export = struct { - name: []const u8, - typed_value: TypedValue, +pub const Module = struct { + /// General-purpose allocator. + allocator: *Allocator, + /// Module owns this resource. + root_pkg: *Package, + /// Module owns this resource. + root_scope: *Scope.ZIRModule, + /// Pointer to externally managed resource. + bin_file: *link.ElfFile, + failed_decls: ArrayListUnmanaged(*Decl) = .{}, + failed_fns: ArrayListUnmanaged(*Fn) = .{}, + failed_files: ArrayListUnmanaged(*Scope.ZIRModule) = .{}, + decl_table: std.AutoHashMap(Decl.Hash, *Decl), + optimize_mode: std.builtin.Mode, + link_error_flags: link.ElfFile.ErrorFlags = .{}, + + pub const Decl = struct { + /// Contains the memory for `typed_value` and this `Decl` itself. + /// If the Decl is a function, also contains that memory. + /// If the decl has any export nodes, also contains that memory. + /// TODO look into using a more memory efficient arena that will cost less bytes per decl. + /// This one has a minimum allocation of 4096 bytes. + arena: std.heap.ArenaAllocator.State, + /// This name is relative to the containing namespace of the decl. It uses a null-termination + /// to save bytes, since there can be a lot of decls in a compilation. The null byte is not allowed + /// in symbol names, because executable file formats use null-terminated strings for symbol names. + name: [*:0]const u8, + /// It's rare for a decl to be exported, and it's even rarer for a decl to be mapped to more + /// than one export, so we use a linked list to save memory. + export_node: ?*LinkedList(std.builtin.ExportOptions).Node = null, + /// Byte offset into the source file that contains this declaration. + /// This is the base offset that src offsets within this Decl are relative to. src: usize, + /// Represents the "shallow" analysis status. For example, for decls that are functions, + /// the function type is analyzed with this set to `in_progress`, however, the semantic + /// analysis of the function body is performed with this value set to `success`. Functions + /// have their own analysis status field. + analysis: union(enum) { + in_progress, + failure: ErrorMsg, + success: TypedValue, + }, + /// The direct container of the Decl. This field will need to get more fleshed out when + /// self-hosted supports proper struct types and Zig AST => ZIR. + scope: *Scope.ZIRModule, + + pub fn destroy(self: *Decl, allocator: *Allocator) void { + var arena = self.arena.promote(allocator); + arena.deinit(); + } + + pub const Hash = [16]u8; + + /// Must generate unique bytes with no collisions with other decls. + /// The point of hashing here is only to limit the number of bytes of + /// the unique identifier to a fixed size (16 bytes). + pub fn fullyQualifiedNameHash(self: Decl) Hash { + // Right now we only have ZIRModule as the source. So this is simply the + // relative name of the decl. + var out: Hash = undefined; + std.crypto.Blake3.hash(mem.spanZ(u8, self.name), &out); + return out; + } }; + /// Memory is managed by the arena of the owning Decl. pub const Fn = struct { - analysis_status: enum { in_progress, failure, success }, - body: Body, fn_type: Type, + analysis: union(enum) { + in_progress: *Analysis, + failure: ErrorMsg, + success: Body, + }, + /// The direct container of the Fn. This field will need to get more fleshed out when + /// self-hosted supports proper struct types and Zig AST => ZIR. + scope: *Scope.ZIRModule, + + /// This memory managed by the general purpose allocator. + pub const Analysis = struct { + inner_block: Scope.Block, + /// null value means a semantic analysis error happened. + inst_table: std.AutoHashMap(*text.Inst, ?*Inst), + }; + }; + + pub const Scope = struct { + tag: Tag, + + pub fn cast(base: *Scope, comptime T: type) ?*T { + if (base.tag != T.base_tag) + return null; + + return @fieldParentPtr(T, "base", base); + } + + pub const Tag = enum { + zir_module, + block, + decl, + }; + + pub const ZIRModule = struct { + pub const base_tag: Tag = .zir_module; + base: Scope = Scope{ .tag = base_tag }, + /// Relative to the owning package's root_src_dir. + /// Reference to external memory, not owned by ZIRModule. + sub_file_path: []const u8, + contents: union(enum) { + unloaded, + parse_failure: ParseFailure, + success: Contents, + }, + pub const ParseFailure = struct { + source: [:0]const u8, + errors: []ErrorMsg, + + pub fn deinit(self: *ParseFailure, allocator: *Allocator) void { + allocator.free(self.errors); + allocator.free(source); + } + }; + pub const Contents = struct { + source: [:0]const u8, + module: *text.Module, + }; + + pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + switch (self.contents) { + .unloaded => {}, + .parse_failure => |pf| pd.deinit(allocator), + .success => |contents| { + allocator.free(contents.source); + contents.src_zir_module.deinit(allocator); + }, + } + self.* = undefined; + } + + pub fn loadContents(self: *ZIRModule, allocator: *Allocator) !*Contents { + if (self.contents) |contents| return contents; + + const max_size = std.math.maxInt(u32); + const source = try self.root_pkg_dir.readFileAllocOptions(allocator, self.root_src_path, max_size, 1, 0); + errdefer allocator.free(source); + + var errors = std.ArrayList(ErrorMsg).init(allocator); + defer errors.deinit(); + + var src_zir_module = try text.parse(allocator, source, &errors); + errdefer src_zir_module.deinit(allocator); + + switch (self.contents) { + .parse_failure => |pf| pf.deinit(allocator), + .unloaded => {}, + .success => unreachable, + } + + if (errors.items.len != 0) { + self.contents = .{ .parse_failure = errors.toOwnedSlice() }; + return error.ParseFailure; + } + self.contents = .{ + .success = .{ + .source = source, + .module = src_zir_module, + }, + }; + return &self.contents.success; + } + }; + + /// This is a temporary structure, references to it are valid only + /// during semantic analysis of the block. + pub const Block = struct { + pub const base_tag: Tag = .block; + base: Scope = Scope{ .tag = base_tag }, + func: *Fn, + instructions: ArrayListUnmanaged(*Inst), + }; + + /// This is a temporary structure, references to it are valid only + /// during semantic analysis of the decl. + pub const DeclAnalysis = struct { + pub const base_tag: Tag = .decl; + base: Scope = Scope{ .tag = base_tag }, + decl: *Decl, + }; }; pub const Body = struct { instructions: []*Inst, }; - pub fn deinit(self: *Module, allocator: *Allocator) void { - allocator.free(self.exports); - allocator.free(self.errors); - for (self.fns) |f| { - allocator.free(f.body.instructions); + pub const AllErrors = struct { + arena: std.heap.ArenaAllocator.State, + list: []const Message, + + pub const Message = struct { + src_path: []const u8, + line: usize, + column: usize, + byte_offset: usize, + msg: []const u8, + }; + + pub fn deinit(self: *AllErrors, allocator: *Allocator) void { + self.arena.promote(allocator).deinit(); } - allocator.free(self.fns); - self.arena.deinit(); + + fn add( + arena: *std.heap.ArenaAllocator, + errors: *std.ArrayList(Message), + sub_file_path: []const u8, + source: []const u8, + simple_err_msg: ErrorMsg, + ) !void { + const loc = std.zig.findLineColumn(source, simple_err_msg.byte_offset); + try errors.append(.{ + .src_path = try mem.dupe(u8, &arena.allocator, sub_file_path), + .msg = try mem.dupe(u8, &arena.allocator, simple_err_msg.msg), + .byte_offset = simple_err_msg.byte_offset, + .line = loc.line, + .column = loc.column, + }); + } + }; + + pub fn deinit(self: *Module) void { + const allocator = self.allocator; + allocator.free(self.errors); + { + var it = self.decl_table.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.decl_table.deinit(); + } + self.root_pkg.destroy(); + self.root_scope.deinit(); self.* = undefined; } -}; -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, -}; + pub fn target(self: Module) std.Target { + return self.bin_file.options.target; + } -pub const AnalyzeOptions = struct { - target: Target, - output_mode: std.builtin.OutputMode, - link_mode: std.builtin.LinkMode, - object_format: ?std.Target.ObjectFormat = null, - optimize_mode: std.builtin.Mode, -}; + /// Detect changes to source files, perform semantic analysis, and update the output files. + pub fn update(self: *Module) !void { + // TODO Use the cache hash file system to detect which source files changed. + // Here we simulate a full cache miss. + // Analyze the root source file now. + self.analyzeRoot(self.root_scope) catch |err| switch (err) { + error.AnalysisFail => { + assert(self.totalErrorCount() != 0); + }, + else => |e| return e, + }; -pub fn analyze(allocator: *Allocator, old_module: text.Module, options: AnalyzeOptions) !Module { - var ctx = Analyze{ - .allocator = allocator, - .arena = std.heap.ArenaAllocator.init(allocator), - .old_module = &old_module, - .errors = std.ArrayList(ErrorMsg).init(allocator), - .decl_table = std.AutoHashMap(*text.Inst, Analyze.NewDecl).init(allocator), - .exports = std.ArrayList(Module.Export).init(allocator), - .fns = std.ArrayList(Module.Fn).init(allocator), - .target = options.target, - .optimize_mode = options.optimize_mode, - .link_mode = options.link_mode, - .output_mode = options.output_mode, - }; - defer ctx.errors.deinit(); - defer ctx.decl_table.deinit(); - defer ctx.exports.deinit(); - defer ctx.fns.deinit(); - errdefer ctx.arena.deinit(); + try self.bin_file.flush(); + self.link_error_flags = self.bin_file.error_flags; + } - ctx.analyzeRoot() catch |err| switch (err) { - error.AnalysisFail => { - assert(ctx.errors.items.len != 0); - }, - else => |e| return e, - }; - return Module{ - .exports = ctx.exports.toOwnedSlice(), - .errors = ctx.errors.toOwnedSlice(), - .fns = ctx.fns.toOwnedSlice(), - .arena = ctx.arena, - .target = ctx.target, - .link_mode = ctx.link_mode, - .output_mode = ctx.output_mode, - .object_format = options.object_format orelse ctx.target.getObjectFormat(), - .optimize_mode = ctx.optimize_mode, - }; -} + pub fn totalErrorCount(self: *Module) usize { + return self.failed_decls.items.len + + self.failed_fns.items.len + + self.failed_decls.items.len + + @boolToInt(self.link_error_flags.no_entry_point_found); + } -const Analyze = struct { - allocator: *Allocator, - arena: std.heap.ArenaAllocator, - old_module: *const text.Module, - errors: std.ArrayList(ErrorMsg), - decl_table: std.AutoHashMap(*text.Inst, NewDecl), - exports: std.ArrayList(Module.Export), - fns: std.ArrayList(Module.Fn), - target: Target, - link_mode: std.builtin.LinkMode, - optimize_mode: std.builtin.Mode, - output_mode: std.builtin.OutputMode, + pub fn getAllErrorsAlloc(self: *Module) !AllErrors { + var arena = std.heap.ArenaAllocator.init(self.allocator); + errdefer arena.deinit(); - const NewDecl = struct { - /// null means a semantic analysis error happened - ptr: ?*Inst, - }; + var errors = std.ArrayList(AllErrors.Message).init(self.allocator); + defer errors.deinit(); - const NewInst = struct { - /// null means a semantic analysis error happened - ptr: ?*Inst, - }; + for (self.failed_files.items) |scope| { + const source = scope.parse_failure.source; + for (scope.parse_failure.errors) |parse_error| { + AllErrors.add(&arena, &errors, scope.sub_file_path, source, parse_error); + } + } - const Fn = struct { - /// Index into Module fns array - fn_index: usize, - inner_block: Block, - inst_table: std.AutoHashMap(*text.Inst, NewInst), - }; + for (self.failed_fns.items) |func| { + const source = func.scope.success.source; + for (func.analysis.failure) |err_msg| { + AllErrors.add(&arena, &errors, func.scope.sub_file_path, source, err_msg); + } + } - const Block = struct { - func: *Fn, - instructions: std.ArrayList(*Inst), - }; + for (self.failed_decls.items) |decl| { + const source = decl.scope.success.source; + for (decl.analysis.failure) |err_msg| { + AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg); + } + } + + if (self.link_error_flags.no_entry_point_found) { + try errors.append(.{ + .src_path = self.module.root_src_path, + .line = 0, + .column = 0, + .byte_offset = 0, + .msg = try std.fmt.allocPrint(&arena.allocator, "no entry point found", .{}), + }); + } + + assert(errors.items.len == self.totalErrorCount()); + + return AllErrors{ + .arena = arena.state, + .list = try mem.dupe(&arena.allocator, AllErrors.Message, errors.items), + }; + } const InnerError = error{ OutOfMemory, AnalysisFail }; - fn analyzeRoot(self: *Analyze) !void { - for (self.old_module.decls) |decl| { + fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { + // TODO use the cache to identify, from the modified source files, the decls which have + // changed based on the span of memory that represents the decl in the re-parsed source file. + // Use the cached dependency graph to recursively determine the set of decls which need + // regeneration. + // Here we simulate adding a source file which was previously not part of the compilation, + // which means scanning the decls looking for exports. + // TODO also identify decls that need to be deleted. + const contents = blk: { + // Clear parse errors. + swapRemoveElem(self.allocator, *Scope.ZIRModule, root_scope, self.failed_files); + try self.failed_files.ensureCapacity(self.allocator, self.failed_files.items.len + 1); + break :blk root_scope.loadContents(self.allocator) catch |err| switch (err) { + error.ParseFailure => { + self.failed_files.appendAssumeCapacity(root_scope); + return error.AnalysisFail; + }, + else => |e| return e, + }; + }; + for (contents.module.decls) |decl| { if (decl.cast(text.Inst.Export)) |export_inst| { - try analyzeExport(self, null, export_inst); + try analyzeExport(self, &root_scope.base, export_inst); } } } - fn resolveInst(self: *Analyze, opt_block: ?*Block, old_inst: *text.Inst) InnerError!*Inst { - if (opt_block) |block| { + fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { + const hash = old_inst.fullyQualifiedNameHash(); + if (self.decl_table.get(hash)) |kv| { + return kv.value; + } else { + const new_decl = blk: { + var decl_arena = std.heap.ArenaAllocator.init(self.allocator); + errdefer decl_arena.deinit(); + const new_decl = try decl_arena.allocator.create(Decl); + const name = try mem.dupeZ(&decl_arena.allocator, u8, old_inst.name); + new_decl.* = .{ + .arena = decl_arena.state, + .name = name, + .src = old_inst.src, + .analysis = .in_progress, + .scope = scope.findZIRModule(), + }; + try self.decl_table.putNoClobber(hash, new_decl); + break :blk new_decl; + }; + + var decl_scope: Scope.DeclAnalysis = .{ .decl = new_decl }; + const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { + error.AnalysisFail => return error.AnalysisFail, + else => |e| return e, + }; + new_decl.analysis = .{ .success = typed_value }; + if (try self.bin_file.updateDecl(self.*, typed_value, new_decl.export_node, hash)) |err_msg| { + new_decl.analysis = .{ .success = typed_value }; + } else |err| { + return err; + } + return new_decl; + } + } + + fn resolveInst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Inst { + if (scope.cast(Scope.Block)) |block| { if (block.func.inst_table.get(old_inst)) |kv| { return kv.value.ptr orelse return error.AnalysisFail; } } - if (self.decl_table.get(old_inst)) |kv| { - return kv.value.ptr orelse return error.AnalysisFail; - } else { - const new_inst = self.analyzeInst(null, old_inst) catch |err| switch (err) { - error.AnalysisFail => { - try self.decl_table.putNoClobber(old_inst, .{ .ptr = null }); - return error.AnalysisFail; - }, - else => |e| return e, - }; - try self.decl_table.putNoClobber(old_inst, .{ .ptr = new_inst }); - return new_inst; - } + const decl = try self.resolveDecl(scope, old_inst); + const decl_ref = try self.analyzeDeclRef(scope, old_inst.src, decl); + return self.analyzeDeref(scope, old_inst.src, decl_ref); } - fn requireRuntimeBlock(self: *Analyze, block: ?*Block, src: usize) !*Block { - return block orelse return self.fail(src, "instruction illegal outside function body", .{}); + fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { + return scope.cast(Scope.Block) orelse + return self.fail(scope, src, "instruction illegal outside function body", .{}); } - fn resolveInstConst(self: *Analyze, block: ?*Block, old_inst: *text.Inst) InnerError!TypedValue { - const new_inst = try self.resolveInst(block, old_inst); + fn resolveInstConst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!TypedValue { + const new_inst = try self.resolveInst(scope, old_inst); const val = try self.resolveConstValue(new_inst); return TypedValue{ .ty = new_inst.ty, @@ -329,60 +589,67 @@ const Analyze = struct { }; } - fn resolveConstValue(self: *Analyze, base: *Inst) !Value { + fn resolveConstValue(self: *Module, scope: *Scope, base: *Inst) !Value { return (try self.resolveDefinedValue(base)) orelse - return self.fail(base.src, "unable to resolve comptime value", .{}); + return self.fail(scope, base.src, "unable to resolve comptime value", .{}); } - fn resolveDefinedValue(self: *Analyze, base: *Inst) !?Value { + fn resolveDefinedValue(self: *Module, scope: *Scope, base: *Inst) !?Value { if (base.value()) |val| { if (val.isUndef()) { - return self.fail(base.src, "use of undefined value here causes undefined behavior", .{}); + return self.fail(scope, base.src, "use of undefined value here causes undefined behavior", .{}); } return val; } return null; } - fn resolveConstString(self: *Analyze, block: ?*Block, old_inst: *text.Inst) ![]u8 { - const new_inst = try self.resolveInst(block, old_inst); + fn resolveConstString(self: *Module, scope: *Scope, old_inst: *text.Inst) ![]u8 { + const new_inst = try self.resolveInst(scope, old_inst); const wanted_type = Type.initTag(.const_slice_u8); - const coerced_inst = try self.coerce(block, wanted_type, new_inst); + const coerced_inst = try self.coerce(scope, wanted_type, new_inst); const val = try self.resolveConstValue(coerced_inst); return val.toAllocatedBytes(&self.arena.allocator); } - fn resolveType(self: *Analyze, block: ?*Block, old_inst: *text.Inst) !Type { - const new_inst = try self.resolveInst(block, old_inst); + fn resolveType(self: *Module, scope: *Scope, old_inst: *text.Inst) !Type { + const new_inst = try self.resolveInst(scope, old_inst); const wanted_type = Type.initTag(.@"type"); - const coerced_inst = try self.coerce(block, wanted_type, new_inst); + const coerced_inst = try self.coerce(scope, wanted_type, new_inst); const val = try self.resolveConstValue(coerced_inst); return val.toType(); } - fn analyzeExport(self: *Analyze, block: ?*Block, export_inst: *text.Inst.Export) !void { - const symbol_name = try self.resolveConstString(block, export_inst.positionals.symbol_name); - const typed_value = try self.resolveInstConst(block, export_inst.positionals.value); + fn analyzeExport(self: *Module, scope: *Scope, export_inst: *text.Inst.Export) !void { + const symbol_name = try self.resolveConstString(scope, export_inst.positionals.symbol_name); + const decl = try self.resolveDecl(scope, export_inst.positionals.value); - switch (typed_value.ty.zigTypeTag()) { - .Fn => {}, - else => return self.fail( - export_inst.positionals.value.src, - "unable to export type '{}'", - .{typed_value.ty}, - ), + switch (decl.analysis) { + .in_progress => unreachable, + .failure => return error.AnalysisFail, + .success => |typed_value| switch (typed_value.ty.zigTypeTag()) { + .Fn => {}, + else => return self.fail( + scope, + export_inst.positionals.value.src, + "unable to export type '{}'", + .{typed_value.ty}, + ), + }, } - try self.exports.append(.{ - .name = symbol_name, - .typed_value = typed_value, - .src = export_inst.base.src, - }); + const Node = LinkedList(std.builtin.ExportOptions).Node; + export_node = try decl.arena.promote(self.allocator).allocator.create(Node); + export_node.* = .{ .data = .{ .name = symbol_name } }; + decl.export_node = export_node; + + // TODO Avoid double update in the case of exporting a decl that we just created. + self.bin_file.updateDeclExports(); } /// TODO should not need the cast on the last parameter at the callsites fn addNewInstArgs( - self: *Analyze, - block: *Block, + self: *Module, + block: *Scope.Block, src: usize, ty: Type, comptime T: type, @@ -393,7 +660,7 @@ const Analyze = struct { return &inst.base; } - fn addNewInst(self: *Analyze, block: *Block, src: usize, ty: Type, comptime T: type) !*T { + fn addNewInst(self: *Module, block: *Scope.Block, src: usize, ty: Type, comptime T: type) !*T { const inst = try self.arena.allocator.create(T); inst.* = .{ .base = .{ @@ -403,11 +670,11 @@ const Analyze = struct { }, .args = undefined, }; - try block.instructions.append(&inst.base); + try block.instructions.append(self.allocator, &inst.base); return inst; } - fn constInst(self: *Analyze, src: usize, typed_value: TypedValue) !*Inst { + fn constInst(self: *Module, src: usize, typed_value: TypedValue) !*Inst { const const_inst = try self.arena.allocator.create(Inst.Constant); const_inst.* = .{ .base = .{ @@ -420,7 +687,7 @@ const Analyze = struct { return &const_inst.base; } - fn constStr(self: *Analyze, src: usize, str: []const u8) !*Inst { + fn constStr(self: *Module, src: usize, str: []const u8) !*Inst { const array_payload = try self.arena.allocator.create(Type.Payload.Array_u8_Sentinel0); array_payload.* = .{ .len = str.len }; @@ -436,35 +703,35 @@ const Analyze = struct { }); } - fn constType(self: *Analyze, src: usize, ty: Type) !*Inst { + fn constType(self: *Module, src: usize, ty: Type) !*Inst { return self.constInst(src, .{ .ty = Type.initTag(.type), .val = try ty.toValue(&self.arena.allocator), }); } - fn constVoid(self: *Analyze, src: usize) !*Inst { + fn constVoid(self: *Module, src: usize) !*Inst { return self.constInst(src, .{ .ty = Type.initTag(.void), .val = Value.initTag(.the_one_possible_value), }); } - fn constUndef(self: *Analyze, src: usize, ty: Type) !*Inst { + fn constUndef(self: *Module, src: usize, ty: Type) !*Inst { return self.constInst(src, .{ .ty = ty, .val = Value.initTag(.undef), }); } - fn constBool(self: *Analyze, src: usize, v: bool) !*Inst { + fn constBool(self: *Module, src: usize, v: bool) !*Inst { return self.constInst(src, .{ .ty = Type.initTag(.bool), .val = ([2]Value{ Value.initTag(.bool_false), Value.initTag(.bool_true) })[@boolToInt(v)], }); } - fn constIntUnsigned(self: *Analyze, src: usize, ty: Type, int: u64) !*Inst { + fn constIntUnsigned(self: *Module, src: usize, ty: Type, int: u64) !*Inst { const int_payload = try self.arena.allocator.create(Value.Payload.Int_u64); int_payload.* = .{ .int = int }; @@ -474,7 +741,7 @@ const Analyze = struct { }); } - fn constIntSigned(self: *Analyze, src: usize, ty: Type, int: i64) !*Inst { + fn constIntSigned(self: *Module, src: usize, ty: Type, int: i64) !*Inst { const int_payload = try self.arena.allocator.create(Value.Payload.Int_i64); int_payload.* = .{ .int = int }; @@ -484,7 +751,7 @@ const Analyze = struct { }); } - fn constIntBig(self: *Analyze, src: usize, ty: Type, big_int: BigIntConst) !*Inst { + fn constIntBig(self: *Module, src: usize, ty: Type, big_int: BigIntConst) !*Inst { const val_payload = if (big_int.positive) blk: { if (big_int.to(u64)) |x| { return self.constIntUnsigned(src, ty, x); @@ -513,9 +780,18 @@ const Analyze = struct { }); } - fn analyzeInst(self: *Analyze, block: ?*Block, old_inst: *text.Inst) InnerError!*Inst { + fn analyzeInstConst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!TypedValue { + const new_inst = try self.analyzeInst(scope, old_inst); + return TypedValue{ + .ty = new_inst.ty, + .val = try self.resolveConstValue(scope, new_inst), + }; + } + + fn analyzeInst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Inst { switch (old_inst.tag) { - .breakpoint => return self.analyzeInstBreakpoint(block, old_inst.cast(text.Inst.Breakpoint).?), + .breakpoint => return self.analyzeInstBreakpoint(scope, old_inst.cast(text.Inst.Breakpoint).?), + .call => return self.analyzeInstCall(scope, old_inst.cast(text.Inst.Call).?), .str => { // We can use this reference because Inst.Const's Value is arena-allocated. // The value would get copied to a MemoryCell before the `text.Inst.Str` lifetime ends. @@ -526,53 +802,118 @@ const Analyze = struct { const big_int = old_inst.cast(text.Inst.Int).?.positionals.int; return self.constIntBig(old_inst.src, Type.initTag(.comptime_int), big_int); }, - .ptrtoint => return self.analyzeInstPtrToInt(block, old_inst.cast(text.Inst.PtrToInt).?), - .fieldptr => return self.analyzeInstFieldPtr(block, old_inst.cast(text.Inst.FieldPtr).?), - .deref => return self.analyzeInstDeref(block, old_inst.cast(text.Inst.Deref).?), - .as => return self.analyzeInstAs(block, old_inst.cast(text.Inst.As).?), - .@"asm" => return self.analyzeInstAsm(block, old_inst.cast(text.Inst.Asm).?), - .@"unreachable" => return self.analyzeInstUnreachable(block, old_inst.cast(text.Inst.Unreachable).?), - .@"return" => return self.analyzeInstRet(block, old_inst.cast(text.Inst.Return).?), - .@"fn" => return self.analyzeInstFn(block, old_inst.cast(text.Inst.Fn).?), + .ptrtoint => return self.analyzeInstPtrToInt(scope, old_inst.cast(text.Inst.PtrToInt).?), + .fieldptr => return self.analyzeInstFieldPtr(scope, old_inst.cast(text.Inst.FieldPtr).?), + .deref => return self.analyzeInstDeref(scope, old_inst.cast(text.Inst.Deref).?), + .as => return self.analyzeInstAs(scope, old_inst.cast(text.Inst.As).?), + .@"asm" => return self.analyzeInstAsm(scope, old_inst.cast(text.Inst.Asm).?), + .@"unreachable" => return self.analyzeInstUnreachable(scope, old_inst.cast(text.Inst.Unreachable).?), + .@"return" => return self.analyzeInstRet(scope, old_inst.cast(text.Inst.Return).?), + // TODO postpone function analysis until later + .@"fn" => return self.analyzeInstFn(scope, old_inst.cast(text.Inst.Fn).?), .@"export" => { - try self.analyzeExport(block, old_inst.cast(text.Inst.Export).?); + try self.analyzeExport(scope, old_inst.cast(text.Inst.Export).?); return self.constVoid(old_inst.src); }, .primitive => return self.analyzeInstPrimitive(old_inst.cast(text.Inst.Primitive).?), - .fntype => return self.analyzeInstFnType(block, old_inst.cast(text.Inst.FnType).?), - .intcast => return self.analyzeInstIntCast(block, old_inst.cast(text.Inst.IntCast).?), - .bitcast => return self.analyzeInstBitCast(block, old_inst.cast(text.Inst.BitCast).?), - .elemptr => return self.analyzeInstElemPtr(block, old_inst.cast(text.Inst.ElemPtr).?), - .add => return self.analyzeInstAdd(block, old_inst.cast(text.Inst.Add).?), - .cmp => return self.analyzeInstCmp(block, old_inst.cast(text.Inst.Cmp).?), - .condbr => return self.analyzeInstCondBr(block, old_inst.cast(text.Inst.CondBr).?), - .isnull => return self.analyzeInstIsNull(block, old_inst.cast(text.Inst.IsNull).?), - .isnonnull => return self.analyzeInstIsNonNull(block, old_inst.cast(text.Inst.IsNonNull).?), + .fntype => return self.analyzeInstFnType(scope, old_inst.cast(text.Inst.FnType).?), + .intcast => return self.analyzeInstIntCast(scope, old_inst.cast(text.Inst.IntCast).?), + .bitcast => return self.analyzeInstBitCast(scope, old_inst.cast(text.Inst.BitCast).?), + .elemptr => return self.analyzeInstElemPtr(scope, old_inst.cast(text.Inst.ElemPtr).?), + .add => return self.analyzeInstAdd(scope, old_inst.cast(text.Inst.Add).?), + .cmp => return self.analyzeInstCmp(scope, old_inst.cast(text.Inst.Cmp).?), + .condbr => return self.analyzeInstCondBr(scope, old_inst.cast(text.Inst.CondBr).?), + .isnull => return self.analyzeInstIsNull(scope, old_inst.cast(text.Inst.IsNull).?), + .isnonnull => return self.analyzeInstIsNonNull(scope, old_inst.cast(text.Inst.IsNonNull).?), } } - fn analyzeInstBreakpoint(self: *Analyze, block: ?*Block, inst: *text.Inst.Breakpoint) InnerError!*Inst { - const b = try self.requireRuntimeBlock(block, inst.base.src); + fn analyzeInstBreakpoint(self: *Module, scope: *Scope, inst: *text.Inst.Breakpoint) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, inst.base.src); return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); } - fn analyzeInstFn(self: *Analyze, block: ?*Block, fn_inst: *text.Inst.Fn) InnerError!*Inst { - const fn_type = try self.resolveType(block, fn_inst.positionals.fn_type); + fn analyzeInstCall(self: *Module, scope: *Scope, inst: *text.Inst.Call) InnerError!*Inst { + const func = try self.resolveInst(scope, inst.positionals.func); + if (func.ty.zigTypeTag() != .Fn) + return self.fail(scope, inst.positionals.func.src, "type '{}' not a function", .{func.ty}); + + const cc = func.ty.fnCallingConvention(); + if (cc == .Naked) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "unable to call function with naked calling convention", + .{}, + ); + } + const call_params_len = inst.positionals.args.len; + const fn_params_len = func.ty.fnParamLen(); + if (func.ty.fnIsVarArgs()) { + if (call_params_len < fn_params_len) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "expected at least {} arguments, found {}", + .{ fn_params_len, call_params_len }, + ); + } + return self.fail(scope, inst.base.src, "TODO implement support for calling var args functions", .{}); + } else if (fn_params_len != call_params_len) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "expected {} arguments, found {}", + .{ fn_params_len, call_params_len }, + ); + } + + if (inst.kw_args.modifier == .compile_time) { + return self.fail(scope, inst.base.src, "TODO implement comptime function calls", .{}); + } + if (inst.kw_args.modifier != .auto) { + return self.fail(scope, inst.base.src, "TODO implement call with modifier {}", .{inst.kw_args.modifier}); + } + + // TODO handle function calls of generic functions + + const fn_param_types = try self.allocator.alloc(Type, fn_params_len); + defer self.allocator.free(fn_param_types); + func.ty.fnParamTypes(fn_param_types); + + const casted_args = try self.arena.allocator.alloc(*Inst, fn_params_len); + for (inst.positionals.args) |src_arg, i| { + const uncasted_arg = try self.resolveInst(scope, src_arg); + casted_args[i] = try self.coerce(scope, fn_param_types[i], uncasted_arg); + } + + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Call, Inst.Args(Inst.Call){ + .func = func, + .args = casted_args, + }); + } + + fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *text.Inst.Fn) InnerError!*Inst { + const fn_type = try self.resolveType(scope, fn_inst.positionals.fn_type); var new_func: Fn = .{ .fn_index = self.fns.items.len, .inner_block = .{ .func = undefined, - .instructions = std.ArrayList(*Inst).init(self.allocator), + .instructions = .{}, }, - .inst_table = std.AutoHashMap(*text.Inst, NewInst).init(self.allocator), + .inst_table = std.AutoHashMap(*text.Inst, ?*Inst).init(self.allocator), }; new_func.inner_block.func = &new_func; defer new_func.inner_block.instructions.deinit(); defer new_func.inst_table.deinit(); // Don't hang on to a reference to this when analyzing body instructions, since the memory // could become invalid. - (try self.fns.addOne()).* = .{ + (try self.fns.addOne(self.allocator)).* = .{ .analysis_status = .in_progress, .fn_type = fn_type, .body = undefined, @@ -593,8 +934,15 @@ const Analyze = struct { }); } - fn analyzeInstFnType(self: *Analyze, block: ?*Block, fntype: *text.Inst.FnType) InnerError!*Inst { - const return_type = try self.resolveType(block, fntype.positionals.return_type); + fn analyzeInstFnType(self: *Module, scope: *Scope, fntype: *text.Inst.FnType) InnerError!*Inst { + const return_type = try self.resolveType(scope, fntype.positionals.return_type); + + if (return_type.zigTypeTag() == .NoReturn and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .Unspecified) + { + return self.constType(fntype.base.src, Type.initTag(.fn_noreturn_no_args)); + } if (return_type.zigTypeTag() == .NoReturn and fntype.positionals.param_types.len == 0 and @@ -610,37 +958,37 @@ const Analyze = struct { return self.constType(fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); } - return self.fail(fntype.base.src, "TODO implement fntype instruction more", .{}); + return self.fail(scope, fntype.base.src, "TODO implement fntype instruction more", .{}); } - fn analyzeInstPrimitive(self: *Analyze, primitive: *text.Inst.Primitive) InnerError!*Inst { + fn analyzeInstPrimitive(self: *Module, primitive: *text.Inst.Primitive) InnerError!*Inst { return self.constType(primitive.base.src, primitive.positionals.tag.toType()); } - fn analyzeInstAs(self: *Analyze, block: ?*Block, as: *text.Inst.As) InnerError!*Inst { - const dest_type = try self.resolveType(block, as.positionals.dest_type); - const new_inst = try self.resolveInst(block, as.positionals.value); - return self.coerce(block, dest_type, new_inst); + fn analyzeInstAs(self: *Module, scope: *Scope, as: *text.Inst.As) InnerError!*Inst { + const dest_type = try self.resolveType(scope, as.positionals.dest_type); + const new_inst = try self.resolveInst(scope, as.positionals.value); + return self.coerce(scope, dest_type, new_inst); } - fn analyzeInstPtrToInt(self: *Analyze, block: ?*Block, ptrtoint: *text.Inst.PtrToInt) InnerError!*Inst { - const ptr = try self.resolveInst(block, ptrtoint.positionals.ptr); + fn analyzeInstPtrToInt(self: *Module, scope: *Scope, ptrtoint: *text.Inst.PtrToInt) InnerError!*Inst { + const ptr = try self.resolveInst(scope, ptrtoint.positionals.ptr); if (ptr.ty.zigTypeTag() != .Pointer) { - return self.fail(ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); + return self.fail(scope, ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); } // TODO handle known-pointer-address - const b = try self.requireRuntimeBlock(block, ptrtoint.base.src); + const b = try self.requireRuntimeBlock(scope, ptrtoint.base.src); const ty = Type.initTag(.usize); return self.addNewInstArgs(b, ptrtoint.base.src, ty, Inst.PtrToInt, Inst.Args(Inst.PtrToInt){ .ptr = ptr }); } - fn analyzeInstFieldPtr(self: *Analyze, block: ?*Block, fieldptr: *text.Inst.FieldPtr) InnerError!*Inst { - const object_ptr = try self.resolveInst(block, fieldptr.positionals.object_ptr); - const field_name = try self.resolveConstString(block, fieldptr.positionals.field_name); + fn analyzeInstFieldPtr(self: *Module, scope: *Scope, fieldptr: *text.Inst.FieldPtr) InnerError!*Inst { + const object_ptr = try self.resolveInst(scope, fieldptr.positionals.object_ptr); + const field_name = try self.resolveConstString(scope, fieldptr.positionals.field_name); const elem_ty = switch (object_ptr.ty.zigTypeTag()) { .Pointer => object_ptr.ty.elemType(), - else => return self.fail(fieldptr.positionals.object_ptr.src, "expected pointer, found '{}'", .{object_ptr.ty}), + else => return self.fail(scope, fieldptr.positionals.object_ptr.src, "expected pointer, found '{}'", .{object_ptr.ty}), }; switch (elem_ty.zigTypeTag()) { .Array => { @@ -657,24 +1005,26 @@ const Analyze = struct { }); } else { return self.fail( + scope, fieldptr.positionals.field_name.src, "no member named '{}' in '{}'", .{ field_name, elem_ty }, ); } }, - else => return self.fail(fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), + else => return self.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), } } - fn analyzeInstIntCast(self: *Analyze, block: ?*Block, intcast: *text.Inst.IntCast) InnerError!*Inst { - const dest_type = try self.resolveType(block, intcast.positionals.dest_type); - const new_inst = try self.resolveInst(block, intcast.positionals.value); + fn analyzeInstIntCast(self: *Module, scope: *Scope, intcast: *text.Inst.IntCast) InnerError!*Inst { + const dest_type = try self.resolveType(scope, intcast.positionals.dest_type); + const new_inst = try self.resolveInst(scope, intcast.positionals.value); const dest_is_comptime_int = switch (dest_type.zigTypeTag()) { .ComptimeInt => true, .Int => false, else => return self.fail( + scope, intcast.positionals.dest_type.src, "expected integer type, found '{}'", .{ @@ -686,6 +1036,7 @@ const Analyze = struct { switch (new_inst.ty.zigTypeTag()) { .ComptimeInt, .Int => {}, else => return self.fail( + scope, intcast.positionals.value.src, "expected integer type, found '{}'", .{new_inst.ty}, @@ -693,22 +1044,22 @@ const Analyze = struct { } if (dest_is_comptime_int or new_inst.value() != null) { - return self.coerce(block, dest_type, new_inst); + return self.coerce(scope, dest_type, new_inst); } - return self.fail(intcast.base.src, "TODO implement analyze widen or shorten int", .{}); + return self.fail(scope, intcast.base.src, "TODO implement analyze widen or shorten int", .{}); } - fn analyzeInstBitCast(self: *Analyze, block: ?*Block, inst: *text.Inst.BitCast) InnerError!*Inst { - const dest_type = try self.resolveType(block, inst.positionals.dest_type); - const operand = try self.resolveInst(block, inst.positionals.operand); - return self.bitcast(block, dest_type, operand); + fn analyzeInstBitCast(self: *Module, scope: *Scope, inst: *text.Inst.BitCast) InnerError!*Inst { + const dest_type = try self.resolveType(scope, inst.positionals.dest_type); + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.bitcast(scope, dest_type, operand); } - fn analyzeInstElemPtr(self: *Analyze, block: ?*Block, inst: *text.Inst.ElemPtr) InnerError!*Inst { - const array_ptr = try self.resolveInst(block, inst.positionals.array_ptr); - const uncasted_index = try self.resolveInst(block, inst.positionals.index); - const elem_index = try self.coerce(block, Type.initTag(.usize), uncasted_index); + fn analyzeInstElemPtr(self: *Module, scope: *Scope, inst: *text.Inst.ElemPtr) InnerError!*Inst { + const array_ptr = try self.resolveInst(scope, inst.positionals.array_ptr); + const uncasted_index = try self.resolveInst(scope, inst.positionals.index); + const elem_index = try self.coerce(scope, Type.initTag(.usize), uncasted_index); if (array_ptr.ty.isSinglePointer() and array_ptr.ty.elemType().zigTypeTag() == .Array) { if (array_ptr.value()) |array_ptr_val| { @@ -717,28 +1068,25 @@ const Analyze = struct { const index_u64 = index_val.toUnsignedInt(); // @intCast here because it would have been impossible to construct a value that // required a larger index. - const elem_val = try array_ptr_val.elemValueAt(&self.arena.allocator, @intCast(usize, index_u64)); - - const ref_payload = try self.arena.allocator.create(Value.Payload.RefVal); - ref_payload.* = .{ .val = elem_val }; + const elem_ptr = try array_ptr_val.elemPtr(&self.arena.allocator, @intCast(usize, index_u64)); const type_payload = try self.arena.allocator.create(Type.Payload.SingleConstPointer); type_payload.* = .{ .pointee_type = array_ptr.ty.elemType().elemType() }; return self.constInst(inst.base.src, .{ .ty = Type.initPayload(&type_payload.base), - .val = Value.initPayload(&ref_payload.base), + .val = elem_ptr, }); } } } - return self.fail(inst.base.src, "TODO implement more analyze elemptr", .{}); + return self.fail(scope, inst.base.src, "TODO implement more analyze elemptr", .{}); } - fn analyzeInstAdd(self: *Analyze, block: ?*Block, inst: *text.Inst.Add) InnerError!*Inst { - const lhs = try self.resolveInst(block, inst.positionals.lhs); - const rhs = try self.resolveInst(block, inst.positionals.rhs); + fn analyzeInstAdd(self: *Module, scope: *Scope, inst: *text.Inst.Add) InnerError!*Inst { + const lhs = try self.resolveInst(scope, inst.positionals.lhs); + const rhs = try self.resolveInst(scope, inst.positionals.rhs); if (lhs.ty.zigTypeTag() == .Int and rhs.ty.zigTypeTag() == .Int) { if (lhs.value()) |lhs_val| { @@ -758,7 +1106,7 @@ const Analyze = struct { const result_limbs = result_bigint.limbs[0..result_bigint.len]; if (!lhs.ty.eql(rhs.ty)) { - return self.fail(inst.base.src, "TODO implement peer type resolution", .{}); + return self.fail(scope, inst.base.src, "TODO implement peer type resolution", .{}); } const val_payload = if (result_bigint.positive) blk: { @@ -779,14 +1127,14 @@ const Analyze = struct { } } - return self.fail(inst.base.src, "TODO implement more analyze add", .{}); + return self.fail(scope, inst.base.src, "TODO implement more analyze add", .{}); } - fn analyzeInstDeref(self: *Analyze, block: ?*Block, deref: *text.Inst.Deref) InnerError!*Inst { - const ptr = try self.resolveInst(block, deref.positionals.ptr); + fn analyzeInstDeref(self: *Module, scope: *Scope, deref: *text.Inst.Deref) InnerError!*Inst { + const ptr = try self.resolveInst(scope, deref.positionals.ptr); const elem_ty = switch (ptr.ty.zigTypeTag()) { .Pointer => ptr.ty.elemType(), - else => return self.fail(deref.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}), + else => return self.fail(scope, deref.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}), }; if (ptr.value()) |val| { return self.constInst(deref.base.src, .{ @@ -795,30 +1143,30 @@ const Analyze = struct { }); } - return self.fail(deref.base.src, "TODO implement runtime deref", .{}); + return self.fail(scope, deref.base.src, "TODO implement runtime deref", .{}); } - fn analyzeInstAsm(self: *Analyze, block: ?*Block, assembly: *text.Inst.Asm) InnerError!*Inst { - const return_type = try self.resolveType(block, assembly.positionals.return_type); - const asm_source = try self.resolveConstString(block, assembly.positionals.asm_source); - const output = if (assembly.kw_args.output) |o| try self.resolveConstString(block, o) else null; + fn analyzeInstAsm(self: *Module, scope: *Scope, assembly: *text.Inst.Asm) InnerError!*Inst { + const return_type = try self.resolveType(scope, assembly.positionals.return_type); + const asm_source = try self.resolveConstString(scope, assembly.positionals.asm_source); + const output = if (assembly.kw_args.output) |o| try self.resolveConstString(scope, o) else null; const inputs = try self.arena.allocator.alloc([]const u8, assembly.kw_args.inputs.len); const clobbers = try self.arena.allocator.alloc([]const u8, assembly.kw_args.clobbers.len); const args = try self.arena.allocator.alloc(*Inst, assembly.kw_args.args.len); for (inputs) |*elem, i| { - elem.* = try self.resolveConstString(block, assembly.kw_args.inputs[i]); + elem.* = try self.resolveConstString(scope, assembly.kw_args.inputs[i]); } for (clobbers) |*elem, i| { - elem.* = try self.resolveConstString(block, assembly.kw_args.clobbers[i]); + elem.* = try self.resolveConstString(scope, assembly.kw_args.clobbers[i]); } for (args) |*elem, i| { - const arg = try self.resolveInst(block, assembly.kw_args.args[i]); - elem.* = try self.coerce(block, Type.initTag(.usize), arg); + const arg = try self.resolveInst(scope, assembly.kw_args.args[i]); + elem.* = try self.coerce(scope, Type.initTag(.usize), arg); } - const b = try self.requireRuntimeBlock(block, assembly.base.src); + const b = try self.requireRuntimeBlock(scope, assembly.base.src); return self.addNewInstArgs(b, assembly.base.src, return_type, Inst.Assembly, Inst.Args(Inst.Assembly){ .asm_source = asm_source, .is_volatile = assembly.kw_args.@"volatile", @@ -829,9 +1177,9 @@ const Analyze = struct { }); } - fn analyzeInstCmp(self: *Analyze, block: ?*Block, inst: *text.Inst.Cmp) InnerError!*Inst { - const lhs = try self.resolveInst(block, inst.positionals.lhs); - const rhs = try self.resolveInst(block, inst.positionals.rhs); + fn analyzeInstCmp(self: *Module, scope: *Scope, inst: *text.Inst.Cmp) InnerError!*Inst { + const lhs = try self.resolveInst(scope, inst.positionals.lhs); + const rhs = try self.resolveInst(scope, inst.positionals.rhs); const op = inst.positionals.op; const is_equality_cmp = switch (op) { @@ -853,7 +1201,7 @@ const Analyze = struct { const is_null = opt_val.isNull(); return self.constBool(inst.base.src, if (op == .eq) is_null else !is_null); } - const b = try self.requireRuntimeBlock(block, inst.base.src); + const b = try self.requireRuntimeBlock(scope, inst.base.src); switch (op) { .eq => return self.addNewInstArgs( b, @@ -874,64 +1222,64 @@ const Analyze = struct { } else if (is_equality_cmp and ((lhs_ty_tag == .Null and rhs.ty.isCPtr()) or (rhs_ty_tag == .Null and lhs.ty.isCPtr()))) { - return self.fail(inst.base.src, "TODO implement C pointer cmp", .{}); + return self.fail(scope, inst.base.src, "TODO implement C pointer cmp", .{}); } else if (lhs_ty_tag == .Null or rhs_ty_tag == .Null) { const non_null_type = if (lhs_ty_tag == .Null) rhs.ty else lhs.ty; - return self.fail(inst.base.src, "comparison of '{}' with null", .{non_null_type}); + return self.fail(scope, inst.base.src, "comparison of '{}' with null", .{non_null_type}); } else if (is_equality_cmp and ((lhs_ty_tag == .EnumLiteral and rhs_ty_tag == .Union) or (rhs_ty_tag == .EnumLiteral and lhs_ty_tag == .Union))) { - return self.fail(inst.base.src, "TODO implement equality comparison between a union's tag value and an enum literal", .{}); + return self.fail(scope, inst.base.src, "TODO implement equality comparison between a union's tag value and an enum literal", .{}); } else if (lhs_ty_tag == .ErrorSet and rhs_ty_tag == .ErrorSet) { if (!is_equality_cmp) { - return self.fail(inst.base.src, "{} operator not allowed for errors", .{@tagName(op)}); + return self.fail(scope, inst.base.src, "{} operator not allowed for errors", .{@tagName(op)}); } - return self.fail(inst.base.src, "TODO implement equality comparison between errors", .{}); + return self.fail(scope, inst.base.src, "TODO implement equality comparison between errors", .{}); } else if (lhs.ty.isNumeric() and rhs.ty.isNumeric()) { // This operation allows any combination of integer and float types, regardless of the // signed-ness, comptime-ness, and bit-width. So peer type resolution is incorrect for // numeric types. - return self.cmpNumeric(block, inst.base.src, lhs, rhs, op); + return self.cmpNumeric(scope, inst.base.src, lhs, rhs, op); } - return self.fail(inst.base.src, "TODO implement more cmp analysis", .{}); + return self.fail(scope, inst.base.src, "TODO implement more cmp analysis", .{}); } - fn analyzeInstIsNull(self: *Analyze, block: ?*Block, inst: *text.Inst.IsNull) InnerError!*Inst { - const operand = try self.resolveInst(block, inst.positionals.operand); - return self.analyzeIsNull(block, inst.base.src, operand, true); + fn analyzeInstIsNull(self: *Module, scope: *Scope, inst: *text.Inst.IsNull) InnerError!*Inst { + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.analyzeIsNull(scope, inst.base.src, operand, true); } - fn analyzeInstIsNonNull(self: *Analyze, block: ?*Block, inst: *text.Inst.IsNonNull) InnerError!*Inst { - const operand = try self.resolveInst(block, inst.positionals.operand); - return self.analyzeIsNull(block, inst.base.src, operand, false); + fn analyzeInstIsNonNull(self: *Module, scope: *Scope, inst: *text.Inst.IsNonNull) InnerError!*Inst { + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.analyzeIsNull(scope, inst.base.src, operand, false); } - fn analyzeInstCondBr(self: *Analyze, block: ?*Block, inst: *text.Inst.CondBr) InnerError!*Inst { - const uncasted_cond = try self.resolveInst(block, inst.positionals.condition); - const cond = try self.coerce(block, Type.initTag(.bool), uncasted_cond); + fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *text.Inst.CondBr) InnerError!*Inst { + const uncasted_cond = try self.resolveInst(scope, inst.positionals.condition); + const cond = try self.coerce(scope, Type.initTag(.bool), uncasted_cond); if (try self.resolveDefinedValue(cond)) |cond_val| { const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; - try self.analyzeBody(block, body.*); + try self.analyzeBody(scope, body.*); return self.constVoid(inst.base.src); } - const parent_block = try self.requireRuntimeBlock(block, inst.base.src); + const parent_block = try self.requireRuntimeBlock(scope, inst.base.src); - var true_block: Block = .{ + var true_block: Scope.Block = .{ .func = parent_block.func, - .instructions = std.ArrayList(*Inst).init(self.allocator), + .instructions = .{}, }; defer true_block.instructions.deinit(); - try self.analyzeBody(&true_block, inst.positionals.true_body); + try self.analyzeBody(&true_block.base, inst.positionals.true_body); - var false_block: Block = .{ + var false_block: Scope.Block = .{ .func = parent_block.func, - .instructions = std.ArrayList(*Inst).init(self.allocator), + .instructions = .{}, }; defer false_block.instructions.deinit(); - try self.analyzeBody(&false_block, inst.positionals.false_body); + try self.analyzeBody(&false_block.base, inst.positionals.false_body); // Copy the instruction pointers to the arena memory const true_instructions = try self.arena.allocator.alloc(*Inst, true_block.instructions.items.len); @@ -947,7 +1295,7 @@ const Analyze = struct { }); } - fn wantSafety(self: *Analyze, block: ?*Block) bool { + fn wantSafety(self: *Module, scope: *Scope) bool { return switch (self.optimize_mode) { .Debug => true, .ReleaseSafe => true, @@ -956,47 +1304,47 @@ const Analyze = struct { }; } - fn analyzeInstUnreachable(self: *Analyze, block: ?*Block, unreach: *text.Inst.Unreachable) InnerError!*Inst { - const b = try self.requireRuntimeBlock(block, unreach.base.src); - if (self.wantSafety(block)) { + fn analyzeInstUnreachable(self: *Module, scope: *Scope, unreach: *text.Inst.Unreachable) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, unreach.base.src); + if (self.wantSafety(scope)) { // TODO Once we have a panic function to call, call it here instead of this. _ = try self.addNewInstArgs(b, unreach.base.src, Type.initTag(.void), Inst.Breakpoint, {}); } return self.addNewInstArgs(b, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {}); } - fn analyzeInstRet(self: *Analyze, block: ?*Block, inst: *text.Inst.Return) InnerError!*Inst { - const b = try self.requireRuntimeBlock(block, inst.base.src); + fn analyzeInstRet(self: *Module, scope: *Scope, inst: *text.Inst.Return) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, inst.base.src); return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.Ret, {}); } - fn analyzeBody(self: *Analyze, block: ?*Block, body: text.Module.Body) !void { + fn analyzeBody(self: *Module, scope: *Scope, body: text.Module.Body) !void { for (body.instructions) |src_inst| { - const new_inst = self.analyzeInst(block, src_inst) catch |err| { - if (block) |b| { + const new_inst = self.analyzeInst(scope, src_inst) catch |err| { + if (scope.cast(Scope.Block)) |b| { self.fns.items[b.func.fn_index].analysis_status = .failure; try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = null }); } return err; }; - if (block) |b| try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = new_inst }); + if (scope.cast(Scope.Block)) |b| try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = new_inst }); } } fn analyzeIsNull( - self: *Analyze, - block: ?*Block, + self: *Module, + scope: *Scope, src: usize, operand: *Inst, invert_logic: bool, ) InnerError!*Inst { - return self.fail(src, "TODO implement analysis of isnull and isnotnull", .{}); + return self.fail(scope, src, "TODO implement analysis of isnull and isnotnull", .{}); } /// Asserts that lhs and rhs types are both numeric. fn cmpNumeric( - self: *Analyze, - block: ?*Block, + self: *Module, + scope: *Scope, src: usize, lhs: *Inst, rhs: *Inst, @@ -1010,14 +1358,14 @@ const Analyze = struct { if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) { if (lhs.ty.arrayLen() != rhs.ty.arrayLen()) { - return self.fail(src, "vector length mismatch: {} and {}", .{ + return self.fail(scope, src, "vector length mismatch: {} and {}", .{ lhs.ty.arrayLen(), rhs.ty.arrayLen(), }); } - return self.fail(src, "TODO implement support for vectors in cmpNumeric", .{}); + return self.fail(scope, src, "TODO implement support for vectors in cmpNumeric", .{}); } else if (lhs_ty_tag == .Vector or rhs_ty_tag == .Vector) { - return self.fail(src, "mixed scalar and vector operands to comparison operator: '{}' and '{}'", .{ + return self.fail(scope, src, "mixed scalar and vector operands to comparison operator: '{}' and '{}'", .{ lhs.ty, rhs.ty, }); @@ -1036,7 +1384,7 @@ const Analyze = struct { // of this function if we don't need to. // It must be a runtime comparison. - const b = try self.requireRuntimeBlock(block, src); + const b = try self.requireRuntimeBlock(scope, src); // For floats, emit a float comparison instruction. const lhs_is_float = switch (lhs_ty_tag) { .Float, .ComptimeFloat => true, @@ -1054,14 +1402,14 @@ const Analyze = struct { } else if (rhs_ty_tag == .ComptimeFloat) { break :x lhs.ty; } - if (lhs.ty.floatBits(self.target) >= rhs.ty.floatBits(self.target)) { + if (lhs.ty.floatBits(self.target()) >= rhs.ty.floatBits(self.target())) { break :x lhs.ty; } else { break :x rhs.ty; } }; - const casted_lhs = try self.coerce(block, dest_type, lhs); - const casted_rhs = try self.coerce(block, dest_type, rhs); + const casted_lhs = try self.coerce(scope, dest_type, lhs); + const casted_rhs = try self.coerce(scope, dest_type, rhs); return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ .lhs = casted_lhs, .rhs = casted_rhs, @@ -1117,7 +1465,7 @@ const Analyze = struct { } else if (lhs_is_float) { dest_float_type = lhs.ty; } else { - const int_info = lhs.ty.intInfo(self.target); + const int_info = lhs.ty.intInfo(self.target()); lhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); } @@ -1152,19 +1500,19 @@ const Analyze = struct { } else if (rhs_is_float) { dest_float_type = rhs.ty; } else { - const int_info = rhs.ty.intInfo(self.target); + const int_info = rhs.ty.intInfo(self.target()); rhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); } const dest_type = if (dest_float_type) |ft| ft else blk: { const max_bits = std.math.max(lhs_bits, rhs_bits); const casted_bits = std.math.cast(u16, max_bits) catch |err| switch (err) { - error.Overflow => return self.fail(src, "{} exceeds maximum integer bit count", .{max_bits}), + error.Overflow => return self.fail(scope, src, "{} exceeds maximum integer bit count", .{max_bits}), }; break :blk try self.makeIntType(dest_int_is_signed, casted_bits); }; - const casted_lhs = try self.coerce(block, dest_type, lhs); - const casted_rhs = try self.coerce(block, dest_type, lhs); + const casted_lhs = try self.coerce(scope, dest_type, lhs); + const casted_rhs = try self.coerce(scope, dest_type, lhs); return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ .lhs = casted_lhs, @@ -1173,7 +1521,7 @@ const Analyze = struct { }); } - fn makeIntType(self: *Analyze, signed: bool, bits: u16) !Type { + fn makeIntType(self: *Module, signed: bool, bits: u16) !Type { if (signed) { const int_payload = try self.arena.allocator.create(Type.Payload.IntSigned); int_payload.* = .{ .bits = bits }; @@ -1185,14 +1533,14 @@ const Analyze = struct { } } - fn coerce(self: *Analyze, block: ?*Block, dest_type: Type, inst: *Inst) !*Inst { + fn coerce(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { // If the types are the same, we can return the operand. if (dest_type.eql(inst.ty)) return inst; const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty); if (in_memory_result == .ok) { - return self.bitcast(block, dest_type, inst); + return self.bitcast(scope, dest_type, inst); } // *[N]T to []T @@ -1212,55 +1560,61 @@ const Analyze = struct { if (inst.ty.zigTypeTag() == .ComptimeInt and dest_type.zigTypeTag() == .Int) { // The representation is already correct; we only need to make sure it fits in the destination type. const val = inst.value().?; // comptime_int always has comptime known value - if (!val.intFitsInType(dest_type, self.target)) { - return self.fail(inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); + if (!val.intFitsInType(dest_type, self.target())) { + return self.fail(scope, inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); } return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); } // integer widening if (inst.ty.zigTypeTag() == .Int and dest_type.zigTypeTag() == .Int) { - const src_info = inst.ty.intInfo(self.target); - const dst_info = dest_type.intInfo(self.target); + const src_info = inst.ty.intInfo(self.target()); + const dst_info = dest_type.intInfo(self.target()); if (src_info.signed == dst_info.signed and dst_info.bits >= src_info.bits) { if (inst.value()) |val| { return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); } else { - return self.fail(inst.src, "TODO implement runtime integer widening", .{}); + return self.fail(scope, inst.src, "TODO implement runtime integer widening", .{}); } } else { - return self.fail(inst.src, "TODO implement more int widening {} to {}", .{ inst.ty, dest_type }); + return self.fail(scope, inst.src, "TODO implement more int widening {} to {}", .{ inst.ty, dest_type }); } } - return self.fail(inst.src, "TODO implement type coercion from {} to {}", .{ inst.ty, dest_type }); + return self.fail(scope, inst.src, "TODO implement type coercion from {} to {}", .{ inst.ty, dest_type }); } - fn bitcast(self: *Analyze, block: ?*Block, dest_type: Type, inst: *Inst) !*Inst { + fn bitcast(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { if (inst.value()) |val| { // Keep the comptime Value representation; take the new type. return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); } // TODO validate the type size and other compile errors - const b = try self.requireRuntimeBlock(block, inst.src); + const b = try self.requireRuntimeBlock(scope, inst.src); return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst }); } - fn coerceArrayPtrToSlice(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst { + fn coerceArrayPtrToSlice(self: *Module, dest_type: Type, inst: *Inst) !*Inst { if (inst.value()) |val| { // The comptime Value representation is compatible with both types. return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); } - return self.fail(inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); + return self.fail(scope, inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); } - fn fail(self: *Analyze, src: usize, comptime format: []const u8, args: var) InnerError { + fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { @setCold(true); - const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args); - (try self.errors.addOne()).* = .{ + const err_msg = ErrorMsg{ .byte_offset = src, - .msg = msg, + .msg = try std.fmt.allocPrint(self.allocator, format, args), }; + if (scope.cast(Scope.Block)) |block| { + block.func.analysis = .{ .failure = err_msg }; + } else if (scope.cast(Scope.Decl)) |scope_decl| { + scope_decl.decl.analysis = .{ .failure = err_msg }; + } else { + unreachable; + } return error.AnalysisFail; } @@ -1279,6 +1633,11 @@ const Analyze = struct { } }; +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, +}; + pub fn main() anyerror!void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); @@ -1288,63 +1647,68 @@ pub fn main() anyerror!void { defer std.process.argsFree(allocator, args); const src_path = args[1]; + const bin_path = args[2]; const debug_error_trace = true; - - const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0); - defer allocator.free(source); - - var zir_module = try text.parse(allocator, source); - defer zir_module.deinit(allocator); - - if (zir_module.errors.len != 0) { - for (zir_module.errors) |err_msg| { - const loc = std.zig.findLineColumn(source, err_msg.byte_offset); - std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); - } - if (debug_error_trace) return error.ParseFailure; - std.process.exit(1); - } + const output_zir = true; const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); - var analyzed_module = try analyze(allocator, zir_module, .{ + var bin_file = try link.openBinFilePath(allocator, std.fs.cwd(), bin_path, .{ .target = native_info.target, - .output_mode = .Obj, + .output_mode = .Exe, .link_mode = .Static, - .optimize_mode = .Debug, + .object_format = options.object_format orelse native_info.target.getObjectFormat(), }); - defer analyzed_module.deinit(allocator); + defer bin_file.deinit(allocator); - if (analyzed_module.errors.len != 0) { - for (analyzed_module.errors) |err_msg| { - const loc = std.zig.findLineColumn(source, err_msg.byte_offset); - std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); + var module = blk: { + const root_pkg = try Package.create(allocator, std.fs.cwd(), ".", src_path); + errdefer root_pkg.destroy(); + + const root_scope = try allocator.create(Module.Scope.ZIRModule); + errdefer allocator.destroy(root_scope); + root_scope.* = .{ + .sub_file_path = root_pkg.root_src_path, + .contents = .unloaded, + }; + + break :blk Module{ + .allocator = allocator, + .root_pkg = root_pkg, + .root_scope = root_scope, + .bin_file = &bin_file, + .optimize_mode = .Debug, + .decl_table = std.AutoHashMap(Decl.Hash, *Decl).init(allocator), + }; + }; + defer module.deinit(); + + try module.update(); + + const errors = try module.getAllErrorsAlloc(); + defer errors.deinit(); + + if (errors.list.len != 0) { + for (errors.list) |full_err_msg| { + std.debug.warn("{}:{}:{}: error: {}\n", .{ + full_err_msg.src_path, + full_err_msg.line + 1, + full_err_msg.column + 1, + full_err_msg.msg, + }); } if (debug_error_trace) return error.AnalysisFail; std.process.exit(1); } - const output_zir = true; if (output_zir) { - var new_zir_module = try text.emit_zir(allocator, analyzed_module); + var new_zir_module = try text.emit_zir(allocator, module); defer new_zir_module.deinit(allocator); var bos = std.io.bufferedOutStream(std.io.getStdOut().outStream()); try new_zir_module.writeToStream(allocator, bos.outStream()); try bos.flush(); } - - const link = @import("link.zig"); - var result = try link.updateFilePath(allocator, analyzed_module, std.fs.cwd(), "zir.o"); - defer result.deinit(allocator); - if (result.errors.len != 0) { - for (result.errors) |err_msg| { - const loc = std.zig.findLineColumn(source, err_msg.byte_offset); - std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); - } - if (debug_error_trace) return error.LinkFailure; - std.process.exit(1); - } } // Performance optimization ideas: diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index e1efb40fe5..762eb07b42 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -16,10 +16,16 @@ pub const Inst = struct { tag: Tag, /// Byte offset into the source. src: usize, + name: []const u8, /// These names are used directly as the instruction names in the text format. pub const Tag = enum { breakpoint, + call, + /// Represents a reference to a global decl by name. + /// Canonicalized ZIR will not have any of these. The + /// syntax `@foo` is equivalent to `declref("foo")`. + declref, str, int, ptrtoint, @@ -46,6 +52,8 @@ pub const Inst = struct { pub fn TagToType(tag: Tag) type { return switch (tag) { .breakpoint => Breakpoint, + .call => Call, + .declref => DeclRef, .str => Str, .int => Int, .ptrtoint => PtrToInt, @@ -85,6 +93,29 @@ pub const Inst = struct { kw_args: struct {}, }; + pub const Call = struct { + pub const base_tag = Tag.call; + base: Inst, + + positionals: struct { + func: *Inst, + args: []*Inst, + }, + kw_args: struct { + modifier: std.builtin.CallOptions.Modifier = .auto, + }, + }; + + pub const DeclRef = struct { + pub const base_tag = Tag.declref; + base: Inst, + + positionals: struct { + name: *Inst, + }, + kw_args: struct {}, + }; + pub const Str = struct { pub const base_tag = Tag.str; base: Inst, @@ -212,55 +243,55 @@ pub const Inst = struct { kw_args: struct {}, pub const BuiltinType = enum { - @"isize", - @"usize", - @"c_short", - @"c_ushort", - @"c_int", - @"c_uint", - @"c_long", - @"c_ulong", - @"c_longlong", - @"c_ulonglong", - @"c_longdouble", - @"c_void", - @"f16", - @"f32", - @"f64", - @"f128", - @"bool", - @"void", - @"noreturn", - @"type", - @"anyerror", - @"comptime_int", - @"comptime_float", + isize, + usize, + c_short, + c_ushort, + c_int, + c_uint, + c_long, + c_ulong, + c_longlong, + c_ulonglong, + c_longdouble, + c_void, + f16, + f32, + f64, + f128, + bool, + void, + noreturn, + type, + anyerror, + comptime_int, + comptime_float, fn toType(self: BuiltinType) Type { return switch (self) { - .@"isize" => Type.initTag(.@"isize"), - .@"usize" => Type.initTag(.@"usize"), - .@"c_short" => Type.initTag(.@"c_short"), - .@"c_ushort" => Type.initTag(.@"c_ushort"), - .@"c_int" => Type.initTag(.@"c_int"), - .@"c_uint" => Type.initTag(.@"c_uint"), - .@"c_long" => Type.initTag(.@"c_long"), - .@"c_ulong" => Type.initTag(.@"c_ulong"), - .@"c_longlong" => Type.initTag(.@"c_longlong"), - .@"c_ulonglong" => Type.initTag(.@"c_ulonglong"), - .@"c_longdouble" => Type.initTag(.@"c_longdouble"), - .@"c_void" => Type.initTag(.@"c_void"), - .@"f16" => Type.initTag(.@"f16"), - .@"f32" => Type.initTag(.@"f32"), - .@"f64" => Type.initTag(.@"f64"), - .@"f128" => Type.initTag(.@"f128"), - .@"bool" => Type.initTag(.@"bool"), - .@"void" => Type.initTag(.@"void"), - .@"noreturn" => Type.initTag(.@"noreturn"), - .@"type" => Type.initTag(.@"type"), - .@"anyerror" => Type.initTag(.@"anyerror"), - .@"comptime_int" => Type.initTag(.@"comptime_int"), - .@"comptime_float" => Type.initTag(.@"comptime_float"), + .isize => Type.initTag(.isize), + .usize => Type.initTag(.usize), + .c_short => Type.initTag(.c_short), + .c_ushort => Type.initTag(.c_ushort), + .c_int => Type.initTag(.c_int), + .c_uint => Type.initTag(.c_uint), + .c_long => Type.initTag(.c_long), + .c_ulong => Type.initTag(.c_ulong), + .c_longlong => Type.initTag(.c_longlong), + .c_ulonglong => Type.initTag(.c_ulonglong), + .c_longdouble => Type.initTag(.c_longdouble), + .c_void => Type.initTag(.c_void), + .f16 => Type.initTag(.f16), + .f32 => Type.initTag(.f32), + .f64 => Type.initTag(.f64), + .f128 => Type.initTag(.f128), + .bool => Type.initTag(.bool), + .void => Type.initTag(.void), + .noreturn => Type.initTag(.noreturn), + .type => Type.initTag(.type), + .anyerror => Type.initTag(.anyerror), + .comptime_int => Type.initTag(.comptime_int), + .comptime_float => Type.initTag(.comptime_float), }; } }; @@ -376,7 +407,7 @@ pub const ErrorMsg = struct { pub const Module = struct { decls: []*Inst, errors: []ErrorMsg, - arena: std.heap.ArenaAllocator, + arena: std.heap.ArenaAllocator.State, pub const Body = struct { instructions: []*Inst, @@ -385,7 +416,7 @@ pub const Module = struct { pub fn deinit(self: *Module, allocator: *Allocator) void { allocator.free(self.decls); allocator.free(self.errors); - self.arena.deinit(); + self.arena.promote(allocator).deinit(); self.* = undefined; } @@ -431,6 +462,7 @@ pub const Module = struct { // TODO I tried implementing this with an inline for loop and hit a compiler bug switch (decl.tag) { .breakpoint => return self.writeInstToStreamGeneric(stream, .breakpoint, decl, inst_table), + .call => return self.writeInstToStreamGeneric(stream, .call, decl, inst_table), .str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table), .int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table), .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table), @@ -543,9 +575,9 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module .arena = std.heap.ArenaAllocator.init(allocator), .i = 0, .source = source, - .decls = std.ArrayList(*Inst).init(allocator), - .errors = std.ArrayList(ErrorMsg).init(allocator), .global_name_map = &global_name_map, + .errors = .{}, + .decls = .{}, }; errdefer parser.arena.deinit(); @@ -555,10 +587,11 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module }, else => |e| return e, }; + return Module{ - .decls = parser.decls.toOwnedSlice(), - .errors = parser.errors.toOwnedSlice(), - .arena = parser.arena, + .decls = parser.decls.toOwnedSlice(allocator), + .errors = parser.errors.toOwnedSlice(allocator), + .arena = parser.arena.state, }; } @@ -567,8 +600,8 @@ const Parser = struct { arena: std.heap.ArenaAllocator, i: usize, source: [:0]const u8, - errors: std.ArrayList(ErrorMsg), - decls: std.ArrayList(*Inst), + errors: std.ArrayListUnmanaged(ErrorMsg), + decls: std.ArrayListUnmanaged(*Inst), global_name_map: *std.StringHashMap(usize), const Body = struct { @@ -893,8 +926,25 @@ const Parser = struct { const ident = self.source[name_start..self.i]; const kv = map.get(ident) orelse { const bad_name = self.source[name_start - 1 .. self.i]; - self.i = name_start - 1; - return self.fail("unrecognized identifier: {}", .{bad_name}); + const src = name_start - 1; + if (local_ref) { + self.i = src; + return self.fail("unrecognized identifier: {}", .{bad_name}); + } else { + const name = try self.arena.allocator.create(Inst.Str); + name.* = .{ + .base = .{ .src = src, .tag = Inst.Str.base_tag }, + .positionals = .{ .bytes = ident }, + .kw_args = .{}, + }; + const declref = try self.arena.allocator.create(Inst.DeclRef); + declref.* = .{ + .base = .{ .src = src, .tag = Inst.DeclRef.base_tag }, + .positionals = .{ .name = &name.base }, + .kw_args = .{}, + }; + return &declref.base; + } }; if (local_ref) { return body_ctx.?.instructions.items[kv.value]; @@ -1065,6 +1115,24 @@ const EmitZIR = struct { for (body.instructions) |inst| { const new_inst = switch (inst.tag) { .breakpoint => try self.emitTrivial(inst.src, Inst.Breakpoint), + .call => blk: { + const old_inst = inst.cast(ir.Inst.Call).?; + const new_inst = try self.arena.allocator.create(Inst.Call); + + const args = try self.arena.allocator.alloc(*Inst, old_inst.args.args.len); + for (args) |*elem, i| { + elem.* = try self.resolveInst(inst_table, old_inst.args.args[i]); + } + new_inst.* = .{ + .base = .{ .src = inst.src, .tag = Inst.Call.base_tag }, + .positionals = .{ + .func = try self.resolveInst(inst_table, old_inst.args.func), + .args = args, + }, + .kw_args = .{}, + }; + break :blk &new_inst.base; + }, .unreach => try self.emitTrivial(inst.src, Inst.Unreachable), .ret => try self.emitTrivial(inst.src, Inst.Return), .constant => unreachable, // excluded from function bodies diff --git a/src-self-hosted/libc_installation.zig b/src-self-hosted/libc_installation.zig index 65e5776422..dfc0f1235a 100644 --- a/src-self-hosted/libc_installation.zig +++ b/src-self-hosted/libc_installation.zig @@ -1,6 +1,5 @@ const std = @import("std"); const builtin = @import("builtin"); -const util = @import("util.zig"); const Target = std.Target; const fs = std.fs; const Allocator = std.mem.Allocator; diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 504c374ca7..c9e87d4092 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -9,50 +9,65 @@ const codegen = @import("codegen.zig"); const default_entry_addr = 0x8000000; -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, -}; - -pub const Result = struct { - errors: []ErrorMsg, - - pub fn deinit(self: *Result, allocator: *mem.Allocator) void { - for (self.errors) |err| { - allocator.free(err.msg); - } - allocator.free(self.errors); - self.* = undefined; - } +pub const Options = struct { + target: std.Target, + output_mode: std.builtin.OutputMode, + link_mode: std.builtin.LinkMode, + object_format: std.builtin.ObjectFormat, + /// Used for calculating how much space to reserve for symbols in case the binary file + /// does not already have a symbol table. + symbol_count_hint: u64 = 32, + /// Used for calculating how much space to reserve for executable program code in case + /// the binary file deos not already have such a section. + program_code_size_hint: u64 = 256 * 1024, }; /// Attempts incremental linking, if the file already exists. /// If incremental linking fails, falls back to truncating the file and rewriting it. /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior. /// This operation is not atomic. -pub fn updateFilePath( +pub fn openBinFilePath( allocator: *Allocator, - module: ir.Module, dir: fs.Dir, sub_path: []const u8, -) !Result { - const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = determineMode(module) }); + options: Options, +) !ElfFile { + const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = determineMode(options) }); defer file.close(); - return updateFile(allocator, module, file); + return openBinFile(allocator, file, options); } /// Atomically overwrites the old file, if present. pub fn writeFilePath( allocator: *Allocator, - module: ir.Module, dir: fs.Dir, sub_path: []const u8, -) !Result { - const af = try dir.atomicFile(sub_path, .{ .mode = determineMode(module) }); + module: ir.Module, + errors: *std.ArrayList(ir.ErrorMsg), +) !void { + const options: Options = .{ + .target = module.target, + .output_mode = module.output_mode, + .link_mode = module.link_mode, + .object_format = module.object_format, + .symbol_count_hint = module.decls.items.len, + }; + const af = try dir.atomicFile(sub_path, .{ .mode = determineMode(options) }); defer af.deinit(); - const result = try writeFile(allocator, module, af.file); + const elf_file = try createElfFile(allocator, af.file, options); + for (module.decls.items) |decl| { + try elf_file.updateDecl(module, decl, errors); + } + try elf_file.flush(); + if (elf_file.error_flags.no_entry_point_found) { + try errors.ensureCapacity(errors.items.len + 1); + errors.appendAssumeCapacity(.{ + .byte_offset = 0, + .msg = try std.fmt.allocPrint(errors.allocator, "no entry point found", .{}), + }); + } try af.finish(); return result; } @@ -62,49 +77,65 @@ pub fn writeFilePath( /// Returns an error if `file` is not already open with +read +write +seek abilities. /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior. /// This operation is not atomic. -pub fn updateFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { - return updateFileInner(allocator, module, file) catch |err| switch (err) { +pub fn openBinFile(allocator: *Allocator, file: fs.File, options: Options) !ElfFile { + return openBinFileInner(allocator, file, options) catch |err| switch (err) { error.IncrFailed => { - return writeFile(allocator, module, file); + return createElfFile(allocator, file, options); }, else => |e| return e, }; } -const Update = struct { +pub const ElfFile = struct { + allocator: *Allocator, file: fs.File, - module: *const ir.Module, + options: Options, + ptr_width: enum { p32, p64 }, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. - sections: std.ArrayList(elf.Elf64_Shdr), - shdr_table_offset: ?u64, + sections: std.ArrayListUnmanaged(elf.Elf64_Shdr) = .{}, + shdr_table_offset: ?u64 = null, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. - program_headers: std.ArrayList(elf.Elf64_Phdr), - phdr_table_offset: ?u64, + program_headers: std.ArrayListUnmanaged(elf.Elf64_Phdr) = .{}, + phdr_table_offset: ?u64 = null, /// The index into the program headers of a PT_LOAD program header with Read and Execute flags - phdr_load_re_index: ?u16, - entry_addr: ?u64, + phdr_load_re_index: ?u16 = null, + entry_addr: ?u64 = null, - shstrtab: std.ArrayList(u8), - shstrtab_index: ?u16, + shstrtab: std.ArrayListUnmanaged(u8) = .{}, + shstrtab_index: ?u16 = null, - text_section_index: ?u16, - symtab_section_index: ?u16, + text_section_index: ?u16 = null, + symtab_section_index: ?u16 = null, /// The same order as in the file - symbols: std.ArrayList(elf.Elf64_Sym), + symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, - errors: std.ArrayList(ErrorMsg), + /// Same order as in the file. + offset_table: std.ArrayListUnmanaged(aoeu) = .{}, - fn deinit(self: *Update) void { - self.sections.deinit(); - self.program_headers.deinit(); - self.shstrtab.deinit(); - self.symbols.deinit(); - self.errors.deinit(); + /// This means the entire read-only executable program code needs to be rewritten. + phdr_load_re_dirty: bool = false, + phdr_table_dirty: bool = false, + shdr_table_dirty: bool = false, + shstrtab_dirty: bool = false, + symtab_dirty: bool = false, + + error_flags: ErrorFlags = ErrorFlags{}, + + pub const ErrorFlags = struct { + no_entry_point_found: bool = false, + }; + + pub fn deinit(self: *ElfFile) void { + self.sections.deinit(self.allocator); + self.program_headers.deinit(self.allocator); + self.shstrtab.deinit(self.allocator); + self.symbols.deinit(self.allocator); + self.offset_table.deinit(self.allocator); } // `expand_num / expand_den` is the factor of padding when allocation @@ -112,8 +143,8 @@ const Update = struct { const alloc_den = 3; /// Returns end pos of collision, if any. - fn detectAllocCollision(self: *Update, start: u64, size: u64) ?u64 { - const small_ptr = self.module.target.cpu.arch.ptrBitWidth() == 32; + fn detectAllocCollision(self: *ElfFile, start: u64, size: u64) ?u64 { + const small_ptr = self.options.target.cpu.arch.ptrBitWidth() == 32; const ehdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Ehdr) else @sizeOf(elf.Elf64_Ehdr); if (start < ehdr_size) return ehdr_size; @@ -157,7 +188,7 @@ const Update = struct { return null; } - fn allocatedSize(self: *Update, start: u64) u64 { + fn allocatedSize(self: *ElfFile, start: u64) u64 { var min_pos: u64 = std.math.maxInt(u64); if (self.shdr_table_offset) |off| { if (off > start and off < min_pos) min_pos = off; @@ -176,7 +207,7 @@ const Update = struct { return min_pos - start; } - fn findFreeSpace(self: *Update, object_size: u64, min_alignment: u16) u64 { + fn findFreeSpace(self: *ElfFile, object_size: u64, min_alignment: u16) u64 { var start: u64 = 0; while (self.detectAllocCollision(start, object_size)) |item_end| { start = mem.alignForwardGeneric(u64, item_end, min_alignment); @@ -184,33 +215,21 @@ const Update = struct { return start; } - fn makeString(self: *Update, bytes: []const u8) !u32 { + fn makeString(self: *ElfFile, bytes: []const u8) !u32 { const result = self.shstrtab.items.len; try self.shstrtab.appendSlice(bytes); try self.shstrtab.append(0); return @intCast(u32, result); } - fn perform(self: *Update) !void { - const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, - else => return error.UnsupportedArchitecture, - }; - const small_ptr = switch (ptr_width) { + pub fn populateMissingMetadata(self: *ElfFile) !void { + const small_ptr = switch (self.ptr_width) { .p32 => true, .p64 => false, }; - // This means the entire read-only executable program code needs to be rewritten. - var phdr_load_re_dirty = false; - var phdr_table_dirty = false; - var shdr_table_dirty = false; - var shstrtab_dirty = false; - var symtab_dirty = false; - if (self.phdr_load_re_index == null) { self.phdr_load_re_index = @intCast(u16, self.program_headers.items.len); - const file_size = 256 * 1024; + const file_size = self.options.program_code_size_hint; const p_align = 0x1000; const off = self.findFreeSpace(file_size, p_align); //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); @@ -225,24 +244,8 @@ const Update = struct { .p_flags = elf.PF_X | elf.PF_R, }); self.entry_addr = null; - phdr_load_re_dirty = true; - phdr_table_dirty = true; - } - if (self.sections.items.len == 0) { - // There must always be a null section in index 0 - try self.sections.append(.{ - .sh_name = 0, - .sh_type = elf.SHT_NULL, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 0, - .sh_entsize = 0, - }); - shdr_table_dirty = true; + self.phdr_load_re_dirty = true; + self.phdr_table_dirty = true; } if (self.shstrtab_index == null) { self.shstrtab_index = @intCast(u16, self.sections.items.len); @@ -262,8 +265,8 @@ const Update = struct { .sh_addralign = 1, .sh_entsize = 0, }); - shstrtab_dirty = true; - shdr_table_dirty = true; + self.shstrtab_dirty = true; + self.shdr_table_dirty = true; } if (self.text_section_index == null) { self.text_section_index = @intCast(u16, self.sections.items.len); @@ -281,13 +284,13 @@ const Update = struct { .sh_addralign = phdr.p_align, .sh_entsize = 0, }); - shdr_table_dirty = true; + self.shdr_table_dirty = true; } if (self.symtab_section_index == null) { self.symtab_section_index = @intCast(u16, self.sections.items.len); const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); - const file_size = self.module.exports.len * each_size; + const file_size = self.options.symbol_count_hint * each_size; const off = self.findFreeSpace(file_size, min_align); //std.debug.warn("found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); @@ -300,12 +303,12 @@ const Update = struct { .sh_size = file_size, // The section header index of the associated string table. .sh_link = self.shstrtab_index.?, - .sh_info = @intCast(u32, self.module.exports.len), + .sh_info = @intCast(u32, self.symbols.items.len), .sh_addralign = min_align, .sh_entsize = each_size, }); - symtab_dirty = true; - shdr_table_dirty = true; + self.symtab_dirty = true; + self.shdr_table_dirty = true; } const shsize: u64 = switch (ptr_width) { .p32 => @sizeOf(elf.Elf32_Shdr), @@ -317,7 +320,7 @@ const Update = struct { }; if (self.shdr_table_offset == null) { self.shdr_table_offset = self.findFreeSpace(self.sections.items.len * shsize, shalign); - shdr_table_dirty = true; + self.shdr_table_dirty = true; } const phsize: u64 = switch (ptr_width) { .p32 => @sizeOf(elf.Elf32_Phdr), @@ -329,13 +332,15 @@ const Update = struct { }; if (self.phdr_table_offset == null) { self.phdr_table_offset = self.findFreeSpace(self.program_headers.items.len * phsize, phalign); - phdr_table_dirty = true; + self.phdr_table_dirty = true; } - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + } - try self.writeCodeAndSymbols(phdr_table_dirty, shdr_table_dirty); + /// Commit pending changes and write headers. + pub fn flush(self: *ElfFile) !void { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); - if (phdr_table_dirty) { + if (self.phdr_table_dirty) { const allocated_size = self.allocatedSize(self.phdr_table_offset.?); const needed_size = self.program_headers.items.len * phsize; @@ -345,7 +350,7 @@ const Update = struct { } const allocator = self.program_headers.allocator; - switch (ptr_width) { + switch (self.ptr_width) { .p32 => { const buf = try allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len); defer allocator.free(buf); @@ -371,11 +376,12 @@ const Update = struct { try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, } + self.phdr_table_offset = false; } { const shstrtab_sect = &self.sections.items[self.shstrtab_index.?]; - if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) { + if (self.shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) { const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset); const needed_size = self.shstrtab.items.len; @@ -387,13 +393,14 @@ const Update = struct { //std.debug.warn("shstrtab start=0x{x} end=0x{x}\n", .{ shstrtab_sect.sh_offset, shstrtab_sect.sh_offset + needed_size }); try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset); - if (!shdr_table_dirty) { + if (!self.shdr_table_dirty) { // Then it won't get written with the others and we need to do it. try self.writeSectHeader(self.shstrtab_index.?); } + self.shstrtab_dirty = false; } } - if (shdr_table_dirty) { + if (self.shdr_table_dirty) { const allocated_size = self.allocatedSize(self.shdr_table_offset.?); const needed_size = self.sections.items.len * phsize; @@ -403,7 +410,7 @@ const Update = struct { } const allocator = self.sections.allocator; - switch (ptr_width) { + switch (self.ptr_width) { .p32 => { const buf = try allocator.alloc(elf.Elf32_Shdr, self.sections.items.len); defer allocator.free(buf); @@ -431,38 +438,36 @@ const Update = struct { }, } } - if (self.entry_addr == null and self.module.output_mode == .Exe) { - const msg = try std.fmt.allocPrint(self.errors.allocator, "no entry point found", .{}); - errdefer self.errors.allocator.free(msg); - try self.errors.append(.{ - .byte_offset = 0, - .msg = msg, - }); + if (self.entry_addr == null and self.options.output_mode == .Exe) { + self.error_flags.no_entry_point_found = true; } else { + self.error_flags.no_entry_point_found = false; try self.writeElfHeader(); } // TODO find end pos and truncate + + // The point of flush() is to commit changes, so nothing should be dirty after this. + assert(!self.phdr_load_re_dirty); + assert(!self.phdr_table_dirty); + assert(!self.shdr_table_dirty); + assert(!self.shstrtab_dirty); + assert(!self.symtab_dirty); } - fn writeElfHeader(self: *Update) !void { + fn writeElfHeader(self: *ElfFile) !void { var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined; var index: usize = 0; hdr_buf[0..4].* = "\x7fELF".*; index += 4; - const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, - else => return error.UnsupportedArchitecture, - }; - hdr_buf[index] = switch (ptr_width) { + hdr_buf[index] = switch (self.ptr_width) { .p32 => elf.ELFCLASS32, .p64 => elf.ELFCLASS64, }; index += 1; - const endian = self.module.target.cpu.arch.endian(); + const endian = self.options.target.cpu.arch.endian(); hdr_buf[index] = switch (endian) { .Little => elf.ELFDATA2LSB, .Big => elf.ELFDATA2MSB, @@ -480,10 +485,10 @@ const Update = struct { assert(index == 16); - const elf_type = switch (self.module.output_mode) { + const elf_type = switch (self.options.output_mode) { .Exe => elf.ET.EXEC, .Obj => elf.ET.REL, - .Lib => switch (self.module.link_mode) { + .Lib => switch (self.options.link_mode) { .Static => elf.ET.REL, .Dynamic => elf.ET.DYN, }, @@ -491,7 +496,7 @@ const Update = struct { mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(elf_type), endian); index += 2; - const machine = self.module.target.cpu.arch.toElfMachine(); + const machine = self.options.target.cpu.arch.toElfMachine(); mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(machine), endian); index += 2; @@ -501,7 +506,7 @@ const Update = struct { const e_entry = if (elf_type == .REL) 0 else self.entry_addr.?; - switch (ptr_width) { + switch (self.ptr_width) { .p32 => { mem.writeInt(u32, hdr_buf[index..][0..4], @intCast(u32, e_entry), endian); index += 4; @@ -533,14 +538,14 @@ const Update = struct { mem.writeInt(u32, hdr_buf[index..][0..4], e_flags, endian); index += 4; - const e_ehsize: u16 = switch (ptr_width) { + const e_ehsize: u16 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Ehdr), .p64 => @sizeOf(elf.Elf64_Ehdr), }; mem.writeInt(u16, hdr_buf[index..][0..2], e_ehsize, endian); index += 2; - const e_phentsize: u16 = switch (ptr_width) { + const e_phentsize: u16 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Phdr), .p64 => @sizeOf(elf.Elf64_Phdr), }; @@ -551,7 +556,7 @@ const Update = struct { mem.writeInt(u16, hdr_buf[index..][0..2], e_phnum, endian); index += 2; - const e_shentsize: u16 = switch (ptr_width) { + const e_shentsize: u16 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Shdr), .p64 => @sizeOf(elf.Elf64_Shdr), }; @@ -570,81 +575,172 @@ const Update = struct { try self.file.pwriteAll(hdr_buf[0..index], 0); } - fn writeCodeAndSymbols(self: *Update, phdr_table_dirty: bool, shdr_table_dirty: bool) !void { - // index 0 is always a null symbol - try self.symbols.resize(1); - self.symbols.items[0] = .{ - .st_name = 0, - .st_info = 0, - .st_other = 0, - .st_shndx = 0, - .st_value = 0, - .st_size = 0, - }; + /// TODO Look into making this smaller to save memory. + /// Lots of redundant info here with the data stored in symbol structs. + const DeclSymbol = struct { + symbol_indexes: []usize, + vaddr: u64, + file_offset: u64, + size: u64, + }; + const AllocatedBlock = struct { + vaddr: u64, + file_offset: u64, + size_capacity: u64, + }; + + fn allocateDeclSymbol(self: *ElfFile, size: u64) AllocatedBlock { const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; - var vaddr: u64 = phdr.p_vaddr; - var file_off: u64 = phdr.p_offset; + todo(); + //{ + // // Now that we know the code size, we need to update the program header for executable code + // phdr.p_memsz = vaddr - phdr.p_vaddr; + // phdr.p_filesz = phdr.p_memsz; - var code = std.ArrayList(u8).init(self.sections.allocator); - defer code.deinit(); + // const shdr = &self.sections.items[self.text_section_index.?]; + // shdr.sh_size = phdr.p_filesz; - for (self.module.exports) |exp| { - code.shrink(0); - var symbol = try codegen.generateSymbol(exp.typed_value, self.module.*, &code); - defer symbol.deinit(code.allocator); - if (symbol.errors.len != 0) { - for (symbol.errors) |err| { - const msg = try mem.dupe(self.errors.allocator, u8, err.msg); - errdefer self.errors.allocator.free(msg); - try self.errors.append(.{ - .byte_offset = err.byte_offset, - .msg = msg, - }); - } - continue; - } - try self.file.pwriteAll(code.items, file_off); + // self.phdr_table_dirty = true; // TODO look into making only the one program header dirty + // self.shdr_table_dirty = true; // TODO look into making only the one section dirty + //} - if (mem.eql(u8, exp.name, "_start")) { - self.entry_addr = vaddr; - } - (try self.symbols.addOne()).* = .{ - .st_name = try self.makeString(exp.name), - .st_info = (elf.STB_LOCAL << 4) | elf.STT_FUNC, - .st_other = 0, - .st_shndx = self.text_section_index.?, - .st_value = vaddr, - .st_size = code.items.len, - }; - vaddr += code.items.len; - } - - { - // Now that we know the code size, we need to update the program header for executable code - phdr.p_memsz = vaddr - phdr.p_vaddr; - phdr.p_filesz = phdr.p_memsz; - - const shdr = &self.sections.items[self.text_section_index.?]; - shdr.sh_size = phdr.p_filesz; - - if (!phdr_table_dirty) { - // Then it won't get written with the others and we need to do it. - try self.writeProgHeader(self.phdr_load_re_index.?); - } - if (!shdr_table_dirty) { - // Then it won't get written with the others and we need to do it. - try self.writeSectHeader(self.text_section_index.?); - } - } - - return self.writeSymbols(); + //return self.writeSymbols(); } - fn writeProgHeader(self: *Update, index: usize) !void { - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + fn findAllocatedBlock(self: *ElfFile, vaddr: u64) AllocatedBlock { + todo(); + } + + pub fn updateDecl( + self: *ElfFile, + module: ir.Module, + typed_value: ir.TypedValue, + decl_export_node: ?*std.LinkedList(std.builtin.ExportOptions).Node, + hash: ir.Module.Decl.Hash, + err_msg_allocator: *Allocator, + ) !?ir.ErrorMsg { + var code = std.ArrayList(u8).init(self.allocator); + defer code.deinit(); + + const err_msg = try codegen.generateSymbol(typed_value, module, &code, err_msg_allocator); + if (err_msg != null) |em| return em; + + const export_count = blk: { + var export_node = decl_export_node; + var i: usize = 0; + while (export_node) |node| : (export_node = node.next) i += 1; + break :blk i; + }; + + // Find or create a symbol from the decl + var valid_sym_index_len: usize = 0; + const decl_symbol = blk: { + if (self.decl_table.getValue(hash)) |decl_symbol| { + valid_sym_index_len = decl_symbol.symbol_indexes.len; + decl_symbol.symbol_indexes = try self.allocator.realloc(usize, export_count); + + const existing_block = self.findAllocatedBlock(decl_symbol.vaddr); + if (code.items.len > existing_block.size_capacity) { + const new_block = self.allocateDeclSymbol(code.items.len); + decl_symbol.vaddr = new_block.vaddr; + decl_symbol.file_offset = new_block.file_offset; + decl_symbol.size = code.items.len; + } + break :blk decl_symbol; + } else { + const new_block = self.allocateDeclSymbol(code.items.len); + + const decl_symbol = try self.allocator.create(DeclSymbol); + errdefer self.allocator.destroy(decl_symbol); + + decl_symbol.* = .{ + .symbol_indexes = try self.allocator.alloc(usize, export_count), + .vaddr = new_block.vaddr, + .file_offset = new_block.file_offset, + .size = code.items.len, + }; + errdefer self.allocator.free(decl_symbol.symbol_indexes); + + try self.decl_table.put(hash, decl_symbol); + break :blk decl_symbol; + } + }; + + // Allocate new symbols. + { + var i: usize = valid_sym_index_len; + const old_len = self.symbols.items.len; + try self.symbols.resize(old_len + (decl_symbol.symbol_indexes.len - i)); + while (i < decl_symbol.symbol_indexes) : (i += 1) { + decl_symbol.symbol_indexes[i] = old_len + i; + } + } + + var export_node = decl_export_node; + var export_index: usize = 0; + while (export_node) |node| : ({ + export_node = node.next; + export_index += 1; + }) { + if (node.data.section) |section_name| { + if (!mem.eql(u8, section_name, ".text")) { + try errors.ensureCapacity(errors.items.len + 1); + errors.appendAssumeCapacity(.{ + .byte_offset = 0, + .msg = try std.fmt.allocPrint(errors.allocator, "Unimplemented: ExportOptions.section", .{}), + }); + } + } + const stb_bits = switch (node.data.linkage) { + .Internal => elf.STB_LOCAL, + .Strong => blk: { + if (mem.eql(u8, node.data.name, "_start")) { + self.entry_addr = decl_symbol.vaddr; + } + break :blk elf.STB_GLOBAL; + }, + .Weak => elf.STB_WEAK, + .LinkOnce => { + try errors.ensureCapacity(errors.items.len + 1); + errors.appendAssumeCapacity(.{ + .byte_offset = 0, + .msg = try std.fmt.allocPrint(errors.allocator, "Unimplemented: GlobalLinkage.LinkOnce", .{}), + }); + }, + }; + const stt_bits = switch (typed_value.ty.zigTypeTag()) { + .Fn => elf.STT_FUNC, + else => elf.STT_OBJECT, + }; + const sym_index = decl_symbol.symbol_indexes[export_index]; + const name = blk: { + if (i < valid_sym_index_len) { + const name_stroff = self.symbols.items[sym_index].st_name; + const existing_name = self.getString(name_stroff); + if (mem.eql(u8, existing_name, node.data.name)) { + break :blk name_stroff; + } + } + break :blk try self.makeString(node.data.name); + }; + self.symbols.items[sym_index] = .{ + .st_name = name, + .st_info = (stb_bits << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = decl_symbol.vaddr, + .st_size = code.items.len, + }; + } + + try self.file.pwriteAll(code.items, decl_symbol.file_offset); + } + + fn writeProgHeader(self: *ElfFile, index: usize) !void { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.program_headers.items[index].p_offset; - switch (self.module.target.cpu.arch.ptrBitWidth()) { + switch (self.options.target.cpu.arch.ptrBitWidth()) { 32 => { var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])}; if (foreign_endian) { @@ -663,10 +759,10 @@ const Update = struct { } } - fn writeSectHeader(self: *Update, index: usize) !void { - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + fn writeSectHeader(self: *ElfFile, index: usize) !void { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.sections.items[index].sh_offset; - switch (self.module.target.cpu.arch.ptrBitWidth()) { + switch (self.options.target.cpu.arch.ptrBitWidth()) { 32 => { var shdr: [1]elf.Elf32_Shdr = undefined; shdr[0] = sectHeaderTo32(self.sections.items[index]); @@ -686,13 +782,8 @@ const Update = struct { } } - fn writeSymbols(self: *Update) !void { - const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, - else => return error.UnsupportedArchitecture, - }; - const small_ptr = ptr_width == .p32; + fn writeSymbols(self: *ElfFile) !void { + const small_ptr = self.ptr_width == .p32; const syms_sect = &self.sections.items[self.symtab_section_index.?]; const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); const sym_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); @@ -708,8 +799,8 @@ const Update = struct { syms_sect.sh_size = needed_size; syms_sect.sh_info = @intCast(u32, self.symbols.items.len); const allocator = self.symbols.allocator; - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); - switch (ptr_width) { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + switch (self.ptr_width) { .p32 => { const buf = try allocator.alloc(elf.Elf32_Sym, self.symbols.items.len); defer allocator.free(buf); @@ -754,13 +845,13 @@ const Update = struct { /// Truncates the existing file contents and overwrites the contents. /// Returns an error if `file` is not already open with +read +write +seek abilities. -pub fn writeFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { - switch (module.output_mode) { +pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !ElfFile { + switch (options.output_mode) { .Exe => {}, .Obj => {}, .Lib => return error.TODOImplementWritingLibFiles, } - switch (module.object_format) { + switch (options.object_format) { .unknown => unreachable, // TODO remove this tag from the enum .coff => return error.TODOImplementWritingCOFF, .elf => {}, @@ -768,38 +859,79 @@ pub fn writeFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Resul .wasm => return error.TODOImplementWritingWasmObjects, } - var update = Update{ + var self: ElfFile = .{ + .allocator = allocator, .file = file, - .module = &module, - .sections = std.ArrayList(elf.Elf64_Shdr).init(allocator), - .shdr_table_offset = null, - .program_headers = std.ArrayList(elf.Elf64_Phdr).init(allocator), - .phdr_table_offset = null, - .phdr_load_re_index = null, - .entry_addr = null, - .shstrtab = std.ArrayList(u8).init(allocator), - .shstrtab_index = null, - .text_section_index = null, - .symtab_section_index = null, - - .symbols = std.ArrayList(elf.Elf64_Sym).init(allocator), - - .errors = std.ArrayList(ErrorMsg).init(allocator), + .options = options, + .ptr_width = switch (self.options.target.cpu.arch.ptrBitWidth()) { + 32 => .p32, + 64 => .p64, + else => return error.UnsupportedELFArchitecture, + }, + .symtab_dirty = true, + .shdr_table_dirty = true, }; - defer update.deinit(); + errdefer self.deinit(); - try update.perform(); - return Result{ - .errors = update.errors.toOwnedSlice(), - }; + // Index 0 is always a null symbol. + try self.symbols.append(allocator, .{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = 0, + }); + + // There must always be a null section in index 0 + try self.sections.append(allocator, .{ + .sh_name = 0, + .sh_type = elf.SHT_NULL, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 0, + .sh_entsize = 0, + }); + + try self.populateMissingMetadata(); + + return self; } /// Returns error.IncrFailed if incremental update could not be performed. -fn updateFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { - //var ehdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined; +fn openBinFileInner(allocator: *Allocator, file: fs.File, options: Options) !ElfFile { + switch (options.output_mode) { + .Exe => {}, + .Obj => {}, + .Lib => return error.IncrFailed, + } + switch (options.object_format) { + .unknown => unreachable, // TODO remove this tag from the enum + .coff => return error.IncrFailed, + .elf => {}, + .macho => return error.IncrFailed, + .wasm => return error.IncrFailed, + } + var self: ElfFile = .{ + .allocator = allocator, + .file = file, + .options = options, + .ptr_width = switch (self.options.target.cpu.arch.ptrBitWidth()) { + 32 => .p32, + 64 => .p64, + else => return error.UnsupportedELFArchitecture, + }, + }; + errdefer self.deinit(); - // TODO implement incremental linking + // TODO implement reading the elf file return error.IncrFailed; + //try self.populateMissingMetadata(); + //return self; } /// Saturating multiplication @@ -840,14 +972,14 @@ fn sectHeaderTo32(shdr: elf.Elf64_Shdr) elf.Elf32_Shdr { }; } -fn determineMode(module: ir.Module) fs.File.Mode { +fn determineMode(options: Options) fs.File.Mode { // On common systems with a 0o022 umask, 0o777 will still result in a file created // with 0o755 permissions, but it works appropriately if the system is configured // more leniently. As another data point, C's fopen seems to open files with the // 666 mode. const executable_mode = if (std.Target.current.os.tag == .windows) 0 else 0o777; - switch (module.output_mode) { - .Lib => return switch (module.link_mode) { + switch (options.output_mode) { + .Lib => return switch (options.link_mode) { .Dynamic => executable_mode, .Static => fs.File.default_mode, }, diff --git a/src-self-hosted/package.zig b/src-self-hosted/package.zig deleted file mode 100644 index 3111555878..0000000000 --- a/src-self-hosted/package.zig +++ /dev/null @@ -1,31 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const assert = std.debug.assert; -const ArrayListSentineled = std.ArrayListSentineled; - -pub const Package = struct { - root_src_dir: ArrayListSentineled(u8, 0), - root_src_path: ArrayListSentineled(u8, 0), - - /// relative to root_src_dir - table: Table, - - pub const Table = std.StringHashMap(*Package); - - /// makes internal copies of root_src_dir and root_src_path - /// allocator should be an arena allocator because Package never frees anything - pub fn create(allocator: *mem.Allocator, root_src_dir: []const u8, root_src_path: []const u8) !*Package { - const ptr = try allocator.create(Package); - ptr.* = Package{ - .root_src_dir = try ArrayListSentineled(u8, 0).init(allocator, root_src_dir), - .root_src_path = try ArrayListSentineled(u8, 0).init(allocator, root_src_path), - .table = Table.init(allocator), - }; - return ptr; - } - - pub fn add(self: *Package, name: []const u8, package: *Package) !void { - const entry = try self.table.put(try mem.dupe(self.table.allocator, u8, name), package); - assert(entry == null); - } -}; diff --git a/src-self-hosted/scope.zig b/src-self-hosted/scope.zig deleted file mode 100644 index c294bf8b7c..0000000000 --- a/src-self-hosted/scope.zig +++ /dev/null @@ -1,418 +0,0 @@ -const std = @import("std"); -const Allocator = mem.Allocator; -const Decl = @import("decl.zig").Decl; -const Compilation = @import("compilation.zig").Compilation; -const mem = std.mem; -const ast = std.zig.ast; -const Value = @import("value.zig").Value; -const Type = @import("type.zig").Type; -const ir = @import("ir.zig"); -const Span = @import("errmsg.zig").Span; -const assert = std.debug.assert; -const event = std.event; -const llvm = @import("llvm.zig"); - -pub const Scope = struct { - id: Id, - parent: ?*Scope, - ref_count: std.atomic.Int(usize), - - /// Thread-safe - pub fn ref(base: *Scope) void { - _ = base.ref_count.incr(); - } - - /// Thread-safe - pub fn deref(base: *Scope, comp: *Compilation) void { - if (base.ref_count.decr() == 1) { - if (base.parent) |parent| parent.deref(comp); - switch (base.id) { - .Root => @fieldParentPtr(Root, "base", base).destroy(comp), - .Decls => @fieldParentPtr(Decls, "base", base).destroy(comp), - .Block => @fieldParentPtr(Block, "base", base).destroy(comp), - .FnDef => @fieldParentPtr(FnDef, "base", base).destroy(comp), - .CompTime => @fieldParentPtr(CompTime, "base", base).destroy(comp), - .Defer => @fieldParentPtr(Defer, "base", base).destroy(comp), - .DeferExpr => @fieldParentPtr(DeferExpr, "base", base).destroy(comp), - .Var => @fieldParentPtr(Var, "base", base).destroy(comp), - .AstTree => @fieldParentPtr(AstTree, "base", base).destroy(comp), - } - } - } - - pub fn findRoot(base: *Scope) *Root { - var scope = base; - while (scope.parent) |parent| { - scope = parent; - } - assert(scope.id == .Root); - return @fieldParentPtr(Root, "base", scope); - } - - pub fn findFnDef(base: *Scope) ?*FnDef { - var scope = base; - while (true) { - switch (scope.id) { - .FnDef => return @fieldParentPtr(FnDef, "base", scope), - .Root, .Decls => return null, - - .Block, - .Defer, - .DeferExpr, - .CompTime, - .Var, - => scope = scope.parent.?, - - .AstTree => unreachable, - } - } - } - - pub fn findDeferExpr(base: *Scope) ?*DeferExpr { - var scope = base; - while (true) { - switch (scope.id) { - .DeferExpr => return @fieldParentPtr(DeferExpr, "base", scope), - - .FnDef, - .Decls, - => return null, - - .Block, - .Defer, - .CompTime, - .Root, - .Var, - => scope = scope.parent orelse return null, - - .AstTree => unreachable, - } - } - } - - fn init(base: *Scope, id: Id, parent: *Scope) void { - base.* = Scope{ - .id = id, - .parent = parent, - .ref_count = std.atomic.Int(usize).init(1), - }; - parent.ref(); - } - - pub const Id = enum { - Root, - AstTree, - Decls, - Block, - FnDef, - CompTime, - Defer, - DeferExpr, - Var, - }; - - pub const Root = struct { - base: Scope, - realpath: []const u8, - decls: *Decls, - - /// Creates a Root scope with 1 reference - /// Takes ownership of realpath - pub fn create(comp: *Compilation, realpath: []u8) !*Root { - const self = try comp.gpa().create(Root); - self.* = Root{ - .base = Scope{ - .id = .Root, - .parent = null, - .ref_count = std.atomic.Int(usize).init(1), - }, - .realpath = realpath, - .decls = undefined, - }; - errdefer comp.gpa().destroy(self); - self.decls = try Decls.create(comp, &self.base); - return self; - } - - pub fn destroy(self: *Root, comp: *Compilation) void { - // TODO comp.fs_watch.removeFile(self.realpath); - self.decls.base.deref(comp); - comp.gpa().free(self.realpath); - comp.gpa().destroy(self); - } - }; - - pub const AstTree = struct { - base: Scope, - tree: *ast.Tree, - - /// Creates a scope with 1 reference - /// Takes ownership of tree, will deinit and destroy when done. - pub fn create(comp: *Compilation, tree: *ast.Tree, root_scope: *Root) !*AstTree { - const self = try comp.gpa().create(AstTree); - self.* = AstTree{ - .base = undefined, - .tree = tree, - }; - self.base.init(.AstTree, &root_scope.base); - - return self; - } - - pub fn destroy(self: *AstTree, comp: *Compilation) void { - comp.gpa().free(self.tree.source); - self.tree.deinit(); - comp.gpa().destroy(self); - } - - pub fn root(self: *AstTree) *Root { - return self.base.findRoot(); - } - }; - - pub const Decls = struct { - base: Scope, - - /// This table remains Write Locked when the names are incomplete or possibly outdated. - /// So if a reader manages to grab a lock, it can be sure that the set of names is complete - /// and correct. - table: event.RwLocked(Decl.Table), - - /// Creates a Decls scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope) !*Decls { - const self = try comp.gpa().create(Decls); - self.* = Decls{ - .base = undefined, - .table = event.RwLocked(Decl.Table).init(Decl.Table.init(comp.gpa())), - }; - self.base.init(.Decls, parent); - return self; - } - - pub fn destroy(self: *Decls, comp: *Compilation) void { - self.table.deinit(); - comp.gpa().destroy(self); - } - }; - - pub const Block = struct { - base: Scope, - incoming_values: std.ArrayList(*ir.Inst), - incoming_blocks: std.ArrayList(*ir.BasicBlock), - end_block: *ir.BasicBlock, - is_comptime: *ir.Inst, - - safety: Safety, - - const Safety = union(enum) { - Auto, - Manual: Manual, - - const Manual = struct { - /// the source span that disabled the safety value - span: Span, - - /// whether safety is enabled - enabled: bool, - }; - - fn get(self: Safety, comp: *Compilation) bool { - return switch (self) { - .Auto => switch (comp.build_mode) { - .Debug, - .ReleaseSafe, - => true, - .ReleaseFast, - .ReleaseSmall, - => false, - }, - .Manual => |man| man.enabled, - }; - } - }; - - /// Creates a Block scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope) !*Block { - const self = try comp.gpa().create(Block); - self.* = Block{ - .base = undefined, - .incoming_values = undefined, - .incoming_blocks = undefined, - .end_block = undefined, - .is_comptime = undefined, - .safety = Safety.Auto, - }; - self.base.init(.Block, parent); - return self; - } - - pub fn destroy(self: *Block, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const FnDef = struct { - base: Scope, - - /// This reference is not counted so that the scope can get destroyed with the function - fn_val: ?*Value.Fn, - - /// Creates a FnDef scope with 1 reference - /// Must set the fn_val later - pub fn create(comp: *Compilation, parent: *Scope) !*FnDef { - const self = try comp.gpa().create(FnDef); - self.* = FnDef{ - .base = undefined, - .fn_val = null, - }; - self.base.init(.FnDef, parent); - return self; - } - - pub fn destroy(self: *FnDef, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const CompTime = struct { - base: Scope, - - /// Creates a CompTime scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope) !*CompTime { - const self = try comp.gpa().create(CompTime); - self.* = CompTime{ .base = undefined }; - self.base.init(.CompTime, parent); - return self; - } - - pub fn destroy(self: *CompTime, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const Defer = struct { - base: Scope, - defer_expr_scope: *DeferExpr, - kind: Kind, - - pub const Kind = enum { - ScopeExit, - ErrorExit, - }; - - /// Creates a Defer scope with 1 reference - pub fn create( - comp: *Compilation, - parent: *Scope, - kind: Kind, - defer_expr_scope: *DeferExpr, - ) !*Defer { - const self = try comp.gpa().create(Defer); - self.* = Defer{ - .base = undefined, - .defer_expr_scope = defer_expr_scope, - .kind = kind, - }; - self.base.init(.Defer, parent); - defer_expr_scope.base.ref(); - return self; - } - - pub fn destroy(self: *Defer, comp: *Compilation) void { - self.defer_expr_scope.base.deref(comp); - comp.gpa().destroy(self); - } - }; - - pub const DeferExpr = struct { - base: Scope, - expr_node: *ast.Node, - reported_err: bool, - - /// Creates a DeferExpr scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope, expr_node: *ast.Node) !*DeferExpr { - const self = try comp.gpa().create(DeferExpr); - self.* = DeferExpr{ - .base = undefined, - .expr_node = expr_node, - .reported_err = false, - }; - self.base.init(.DeferExpr, parent); - return self; - } - - pub fn destroy(self: *DeferExpr, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const Var = struct { - base: Scope, - name: []const u8, - src_node: *ast.Node, - data: Data, - - pub const Data = union(enum) { - Param: Param, - Const: *Value, - }; - - pub const Param = struct { - index: usize, - typ: *Type, - llvm_value: *llvm.Value, - }; - - pub fn createParam( - comp: *Compilation, - parent: *Scope, - name: []const u8, - src_node: *ast.Node, - param_index: usize, - param_type: *Type, - ) !*Var { - const self = try create(comp, parent, name, src_node); - self.data = Data{ - .Param = Param{ - .index = param_index, - .typ = param_type, - .llvm_value = undefined, - }, - }; - return self; - } - - pub fn createConst( - comp: *Compilation, - parent: *Scope, - name: []const u8, - src_node: *ast.Node, - value: *Value, - ) !*Var { - const self = try create(comp, parent, name, src_node); - self.data = Data{ .Const = value }; - value.ref(); - return self; - } - - fn create(comp: *Compilation, parent: *Scope, name: []const u8, src_node: *ast.Node) !*Var { - const self = try comp.gpa().create(Var); - self.* = Var{ - .base = undefined, - .name = name, - .src_node = src_node, - .data = undefined, - }; - self.base.init(.Var, parent); - return self; - } - - pub fn destroy(self: *Var, comp: *Compilation) void { - switch (self.data) { - .Param => {}, - .Const => |value| value.deref(comp), - } - comp.gpa().destroy(self); - } - }; -}; diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig index 8186f1f4d8..bac016e1a4 100644 --- a/src-self-hosted/test.zig +++ b/src-self-hosted/test.zig @@ -3,15 +3,14 @@ const link = @import("link.zig"); const ir = @import("ir.zig"); const Allocator = std.mem.Allocator; -var global_ctx: TestContext = undefined; - test "self-hosted" { - try global_ctx.init(); - defer global_ctx.deinit(); + var ctx: TestContext = undefined; + try ctx.init(); + defer ctx.deinit(); - try @import("stage2_tests").addCases(&global_ctx); + try @import("stage2_tests").addCases(&ctx); - try global_ctx.run(); + try ctx.run(); } pub const TestContext = struct { diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 25f726a680..8ace5a096f 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -52,6 +52,7 @@ pub const Type = extern union { .comptime_float => return .ComptimeFloat, .noreturn => return .NoReturn, + .fn_noreturn_no_args => return .Fn, .fn_naked_noreturn_no_args => return .Fn, .fn_ccc_void_no_args => return .Fn, @@ -184,6 +185,7 @@ pub const Type = extern union { => return out_stream.writeAll(@tagName(t)), .const_slice_u8 => return out_stream.writeAll("[]const u8"), + .fn_noreturn_no_args => return out_stream.writeAll("fn() noreturn"), .fn_naked_noreturn_no_args => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), .fn_ccc_void_no_args => return out_stream.writeAll("fn() callconv(.C) void"), .single_const_pointer_to_comptime_int => return out_stream.writeAll("*const comptime_int"), @@ -244,6 +246,7 @@ pub const Type = extern union { .comptime_int => return Value.initTag(.comptime_int_type), .comptime_float => return Value.initTag(.comptime_float_type), .noreturn => return Value.initTag(.noreturn_type), + .fn_noreturn_no_args => return Value.initTag(.fn_noreturn_no_args_type), .fn_naked_noreturn_no_args => return Value.initTag(.fn_naked_noreturn_no_args_type), .fn_ccc_void_no_args => return Value.initTag(.fn_ccc_void_no_args_type), .single_const_pointer_to_comptime_int => return Value.initTag(.single_const_pointer_to_comptime_int_type), @@ -286,6 +289,7 @@ pub const Type = extern union { .array, .array_u8_sentinel_0, .const_slice_u8, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -329,6 +333,7 @@ pub const Type = extern union { .array_u8_sentinel_0, .single_const_pointer, .single_const_pointer_to_comptime_int, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -369,6 +374,7 @@ pub const Type = extern union { .noreturn, .array, .array_u8_sentinel_0, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -410,6 +416,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -451,6 +458,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer, @@ -481,6 +489,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .array, @@ -524,6 +533,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .array, @@ -579,6 +589,7 @@ pub const Type = extern union { /// Asserts the type is a function. pub fn fnParamLen(self: Type) usize { return switch (self.tag()) { + .fn_noreturn_no_args => 0, .fn_naked_noreturn_no_args => 0, .fn_ccc_void_no_args => 0, @@ -622,6 +633,7 @@ pub const Type = extern union { /// given by `fnParamLen`. pub fn fnParamTypes(self: Type, types: []Type) void { switch (self.tag()) { + .fn_noreturn_no_args => return, .fn_naked_noreturn_no_args => return, .fn_ccc_void_no_args => return, @@ -664,6 +676,7 @@ pub const Type = extern union { /// Asserts the type is a function. pub fn fnReturnType(self: Type) Type { return switch (self.tag()) { + .fn_noreturn_no_args => Type.initTag(.noreturn), .fn_naked_noreturn_no_args => Type.initTag(.noreturn), .fn_ccc_void_no_args => Type.initTag(.void), @@ -706,6 +719,7 @@ pub const Type = extern union { /// Asserts the type is a function. pub fn fnCallingConvention(self: Type) std.builtin.CallingConvention { return switch (self.tag()) { + .fn_noreturn_no_args => .Unspecified, .fn_naked_noreturn_no_args => .Naked, .fn_ccc_void_no_args => .C, @@ -745,6 +759,49 @@ pub const Type = extern union { }; } + /// Asserts the type is a function. + pub fn fnIsVarArgs(self: Type) bool { + return switch (self.tag()) { + .fn_noreturn_no_args => false, + .fn_naked_noreturn_no_args => false, + .fn_ccc_void_no_args => false, + + .f16, + .f32, + .f64, + .f128, + .c_longdouble, + .c_void, + .bool, + .void, + .type, + .anyerror, + .comptime_int, + .comptime_float, + .noreturn, + .array, + .single_const_pointer, + .single_const_pointer_to_comptime_int, + .array_u8_sentinel_0, + .const_slice_u8, + .u8, + .i8, + .usize, + .isize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .int_unsigned, + .int_signed, + => unreachable, + }; + } + pub fn isNumeric(self: Type) bool { return switch (self.tag()) { .f16, @@ -776,6 +833,7 @@ pub const Type = extern union { .type, .anyerror, .noreturn, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .array, @@ -812,6 +870,7 @@ pub const Type = extern union { .bool, .type, .anyerror, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer_to_comptime_int, @@ -865,6 +924,7 @@ pub const Type = extern union { .bool, .type, .anyerror, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer_to_comptime_int, @@ -902,11 +962,11 @@ pub const Type = extern union { c_longlong, c_ulonglong, c_longdouble, - c_void, f16, f32, f64, f128, + c_void, bool, void, type, @@ -914,6 +974,7 @@ pub const Type = extern union { comptime_int, comptime_float, noreturn, + fn_noreturn_no_args, fn_naked_noreturn_no_args, fn_ccc_void_no_args, single_const_pointer_to_comptime_int, diff --git a/src-self-hosted/util.zig b/src-self-hosted/util.zig deleted file mode 100644 index 6585fd7c6f..0000000000 --- a/src-self-hosted/util.zig +++ /dev/null @@ -1,47 +0,0 @@ -const std = @import("std"); -const Target = std.Target; -const llvm = @import("llvm.zig"); - -pub fn getDarwinArchString(self: Target) [:0]const u8 { - switch (self.cpu.arch) { - .aarch64 => return "arm64", - .thumb, - .arm, - => return "arm", - .powerpc => return "ppc", - .powerpc64 => return "ppc64", - .powerpc64le => return "ppc64le", - // @tagName should be able to return sentinel terminated slice - else => @panic("TODO https://github.com/ziglang/zig/issues/3779"), //return @tagName(arch), - } -} - -pub fn llvmTargetFromTriple(triple: [:0]const u8) !*llvm.Target { - var result: *llvm.Target = undefined; - var err_msg: [*:0]u8 = undefined; - if (llvm.GetTargetFromTriple(triple, &result, &err_msg) != 0) { - std.debug.warn("triple: {s} error: {s}\n", .{ triple, err_msg }); - return error.UnsupportedTarget; - } - return result; -} - -pub fn initializeAllTargets() void { - llvm.InitializeAllTargets(); - llvm.InitializeAllTargetInfos(); - llvm.InitializeAllTargetMCs(); - llvm.InitializeAllAsmPrinters(); - llvm.InitializeAllAsmParsers(); -} - -pub fn getLLVMTriple(allocator: *std.mem.Allocator, target: std.Target) ![:0]u8 { - var result = try std.ArrayListSentineled(u8, 0).initSize(allocator, 0); - defer result.deinit(); - - try result.outStream().print( - "{}-unknown-{}-{}", - .{ @tagName(target.cpu.arch), @tagName(target.os.tag), @tagName(target.abi) }, - ); - - return result.toOwnedSlice(); -} diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 3d04e6e813..2a2a8b54bc 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -6,6 +6,7 @@ const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; const Target = std.Target; const Allocator = std.mem.Allocator; +const ir = @import("ir.zig"); /// This is the raw data, with no bookkeeping, no memory awareness, /// no de-duplication, and no type system awareness. @@ -45,6 +46,7 @@ pub const Value = extern union { comptime_int_type, comptime_float_type, noreturn_type, + fn_noreturn_no_args_type, fn_naked_noreturn_no_args_type, fn_ccc_void_no_args_type, single_const_pointer_to_comptime_int_type, @@ -64,8 +66,8 @@ pub const Value = extern union { int_big_positive, int_big_negative, function, - ref, - ref_val, + decl_ref, + elem_ptr, bytes, repeated, // the value is a value repeated some number of times @@ -136,6 +138,7 @@ pub const Value = extern union { .comptime_int_type => return out_stream.writeAll("comptime_int"), .comptime_float_type => return out_stream.writeAll("comptime_float"), .noreturn_type => return out_stream.writeAll("noreturn"), + .fn_noreturn_no_args_type => return out_stream.writeAll("fn() noreturn"), .fn_naked_noreturn_no_args_type => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), .fn_ccc_void_no_args_type => return out_stream.writeAll("fn() callconv(.C) void"), .single_const_pointer_to_comptime_int_type => return out_stream.writeAll("*const comptime_int"), @@ -153,11 +156,11 @@ pub const Value = extern union { .int_big_positive => return out_stream.print("{}", .{val.cast(Payload.IntBigPositive).?.asBigInt()}), .int_big_negative => return out_stream.print("{}", .{val.cast(Payload.IntBigNegative).?.asBigInt()}), .function => return out_stream.writeAll("(function)"), - .ref => return out_stream.writeAll("(ref)"), - .ref_val => { - try out_stream.writeAll("*const "); - val = val.cast(Payload.RefVal).?.val; - continue; + .decl_ref => return out_stream.writeAll("(decl ref)"), + .elem_ptr => { + const elem_ptr = val.cast(Payload.Int_u64).?; + try out_stream.print("&[{}] ", .{elem_ptr.index}); + val = elem_ptr.array_ptr; }, .bytes => return std.zig.renderStringLiteral(self.cast(Payload.Bytes).?.data, out_stream), .repeated => { @@ -181,31 +184,32 @@ pub const Value = extern union { return switch (self.tag()) { .ty => self.cast(Payload.Ty).?.ty, - .u8_type => Type.initTag(.@"u8"), - .i8_type => Type.initTag(.@"i8"), - .isize_type => Type.initTag(.@"isize"), - .usize_type => Type.initTag(.@"usize"), - .c_short_type => Type.initTag(.@"c_short"), - .c_ushort_type => Type.initTag(.@"c_ushort"), - .c_int_type => Type.initTag(.@"c_int"), - .c_uint_type => Type.initTag(.@"c_uint"), - .c_long_type => Type.initTag(.@"c_long"), - .c_ulong_type => Type.initTag(.@"c_ulong"), - .c_longlong_type => Type.initTag(.@"c_longlong"), - .c_ulonglong_type => Type.initTag(.@"c_ulonglong"), - .c_longdouble_type => Type.initTag(.@"c_longdouble"), - .f16_type => Type.initTag(.@"f16"), - .f32_type => Type.initTag(.@"f32"), - .f64_type => Type.initTag(.@"f64"), - .f128_type => Type.initTag(.@"f128"), - .c_void_type => Type.initTag(.@"c_void"), - .bool_type => Type.initTag(.@"bool"), - .void_type => Type.initTag(.@"void"), - .type_type => Type.initTag(.@"type"), - .anyerror_type => Type.initTag(.@"anyerror"), - .comptime_int_type => Type.initTag(.@"comptime_int"), - .comptime_float_type => Type.initTag(.@"comptime_float"), - .noreturn_type => Type.initTag(.@"noreturn"), + .u8_type => Type.initTag(.u8), + .i8_type => Type.initTag(.i8), + .isize_type => Type.initTag(.isize), + .usize_type => Type.initTag(.usize), + .c_short_type => Type.initTag(.c_short), + .c_ushort_type => Type.initTag(.c_ushort), + .c_int_type => Type.initTag(.c_int), + .c_uint_type => Type.initTag(.c_uint), + .c_long_type => Type.initTag(.c_long), + .c_ulong_type => Type.initTag(.c_ulong), + .c_longlong_type => Type.initTag(.c_longlong), + .c_ulonglong_type => Type.initTag(.c_ulonglong), + .c_longdouble_type => Type.initTag(.c_longdouble), + .f16_type => Type.initTag(.f16), + .f32_type => Type.initTag(.f32), + .f64_type => Type.initTag(.f64), + .f128_type => Type.initTag(.f128), + .c_void_type => Type.initTag(.c_void), + .bool_type => Type.initTag(.bool), + .void_type => Type.initTag(.void), + .type_type => Type.initTag(.type), + .anyerror_type => Type.initTag(.anyerror), + .comptime_int_type => Type.initTag(.comptime_int), + .comptime_float_type => Type.initTag(.comptime_float), + .noreturn_type => Type.initTag(.noreturn), + .fn_noreturn_no_args_type => Type.initTag(.fn_noreturn_no_args), .fn_naked_noreturn_no_args_type => Type.initTag(.fn_naked_noreturn_no_args), .fn_ccc_void_no_args_type => Type.initTag(.fn_ccc_void_no_args), .single_const_pointer_to_comptime_int_type => Type.initTag(.single_const_pointer_to_comptime_int), @@ -222,8 +226,8 @@ pub const Value = extern union { .int_big_positive, .int_big_negative, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, => unreachable, @@ -259,6 +263,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -267,8 +272,8 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .undef, .repeated, @@ -314,6 +319,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -322,8 +328,8 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .undef, .repeated, @@ -370,6 +376,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -378,8 +385,8 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .undef, .repeated, @@ -431,6 +438,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -439,8 +447,8 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, => unreachable, @@ -521,6 +529,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -529,8 +538,8 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, .undef, @@ -573,6 +582,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -581,8 +591,8 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, .undef, @@ -636,7 +646,7 @@ pub const Value = extern union { } /// Asserts the value is a pointer and dereferences it. - pub fn pointerDeref(self: Value) Value { + pub fn pointerDeref(self: Value, module: *ir.Module) !Value { return switch (self.tag()) { .ty, .u8_type, @@ -664,6 +674,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -683,14 +694,21 @@ pub const Value = extern union { => unreachable, .the_one_possible_value => Value.initTag(.the_one_possible_value), - .ref => self.cast(Payload.Ref).?.cell.contents, - .ref_val => self.cast(Payload.RefVal).?.val, + .decl_ref => { + const index = self.cast(Payload.DeclRef).?.index; + return module.getDeclValue(index); + }, + .elem_ptr => { + const elem_ptr = self.cast(ElemPtr).?; + const array_val = try elem_ptr.array_ptr.pointerDeref(module); + return self.elemValue(array_val, elem_ptr.index); + }, }; } /// Asserts the value is a single-item pointer to an array, or an array, /// or an unknown-length pointer, and returns the element value at the index. - pub fn elemValueAt(self: Value, allocator: *Allocator, index: usize) Allocator.Error!Value { + pub fn elemValue(self: Value, index: usize) Value { switch (self.tag()) { .ty, .u8_type, @@ -718,6 +736,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -733,13 +752,12 @@ pub const Value = extern union { .int_big_positive, .int_big_negative, .undef, + .elem_ptr, + .decl_ref, => unreachable, - .ref => @panic("TODO figure out how MemoryCell works"), - .ref_val => @panic("TODO figure out how MemoryCell works"), - .bytes => { - const int_payload = try allocator.create(Value.Payload.Int_u64); + const int_payload = try allocator.create(Payload.Int_u64); int_payload.* = .{ .int = self.cast(Payload.Bytes).?.data[index] }; return Value.initPayload(&int_payload.base); }, @@ -749,6 +767,17 @@ pub const Value = extern union { } } + /// Returns a pointer to the element value at the index. + pub fn elemPtr(self: Value, allocator: *Allocator, index: usize) !Value { + const payload = try allocator.create(Payload.ElemPtr); + if (self.cast(Payload.ElemPtr)) |elem_ptr| { + payload.* = .{ .array_ptr = elem_ptr.array_ptr, .index = elem_ptr.index + index }; + } else { + payload.* = .{ .array_ptr = self, .index = index }; + } + return Value.initPayload(&payload.base); + } + pub fn isUndef(self: Value) bool { return self.tag() == .undef; } @@ -783,6 +812,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -796,8 +826,8 @@ pub const Value = extern union { .int_i64, .int_big_positive, .int_big_negative, - .ref, - .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, => false, @@ -841,8 +871,7 @@ pub const Value = extern union { pub const Function = struct { base: Payload = Payload{ .tag = .function }, - /// Index into the `fns` array of the `ir.Module` - index: usize, + func: *ir.Module.Fn, }; pub const ArraySentinel0_u8_Type = struct { @@ -855,14 +884,17 @@ pub const Value = extern union { elem_type: *Type, }; - pub const Ref = struct { - base: Payload = Payload{ .tag = .ref }, - cell: *MemoryCell, + /// Represents a pointer to a decl, not the value of the decl. + pub const DeclRef = struct { + base: Payload = Payload{ .tag = .decl_ref }, + /// Index into the Module's decls list + index: usize, }; - pub const RefVal = struct { - base: Payload = Payload{ .tag = .ref_val }, - val: Value, + pub const ElemPtr = struct { + base: Payload = Payload{ .tag = .elem_ptr }, + array_ptr: Value, + index: usize, }; pub const Bytes = struct { @@ -890,29 +922,3 @@ pub const Value = extern union { limbs: [(@sizeOf(u64) / @sizeOf(std.math.big.Limb)) + 1]std.math.big.Limb, }; }; - -/// This is the heart of resource management of the Zig compiler. The Zig compiler uses -/// stop-the-world mark-and-sweep garbage collection during compilation to manage the resources -/// associated with evaluating compile-time code and semantic analysis. Each `MemoryCell` represents -/// a root. -pub const MemoryCell = struct { - parent: Parent, - contents: Value, - - pub const Parent = union(enum) { - none, - struct_field: struct { - struct_base: *MemoryCell, - field_index: usize, - }, - array_elem: struct { - array_base: *MemoryCell, - elem_index: usize, - }, - union_field: *MemoryCell, - err_union_code: *MemoryCell, - err_union_payload: *MemoryCell, - optional_payload: *MemoryCell, - optional_flag: *MemoryCell, - }; -}; diff --git a/src-self-hosted/visib.zig b/src-self-hosted/visib.zig deleted file mode 100644 index 3704600cca..0000000000 --- a/src-self-hosted/visib.zig +++ /dev/null @@ -1,4 +0,0 @@ -pub const Visib = enum { - Private, - Pub, -}; From 619159cf48e953ca65933391313a72c392007710 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 12 May 2020 01:02:48 -0400 Subject: [PATCH 03/31] self-hosted: rework the memory layout of ir.Module and related types * add TypedValue.Managed which represents a Type, a Value, and some kind of memory management strategy. * introduce an analysis queue * flesh out how incremental compilation works with respect to exports * ir.text.Module is only capable of one error message during parsing * link.zig no longer has a decl table map and instead has structs that exist directly on ir.Module.Decl and ir.Module.Export * implement primitive .text block allocation * implement linker code for updating Decls and Exports * implement null Type Some supporting std lib changes: * add std.ArrayList.appendSliceAssumeCapacity * add std.fs.File.copyRange and copyRangeAll * fix std.HashMap having modification safety on in ReleaseSmall builds * add std.HashMap.putAssumeCapacityNoClobber --- lib/std/array_list.zig | 17 +- lib/std/fs/file.zig | 24 ++ lib/std/hash_map.zig | 6 +- src-self-hosted/TypedValue.zig | 23 ++ src-self-hosted/ir.zig | 454 +++++++++++++++++++++++---------- src-self-hosted/ir/text.zig | 16 +- src-self-hosted/link.zig | 360 ++++++++++++++++---------- src-self-hosted/type.zig | 23 +- src-self-hosted/value.zig | 14 +- 9 files changed, 656 insertions(+), 281 deletions(-) create mode 100644 src-self-hosted/TypedValue.zig diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index a47b7cde41..d38bcab1f4 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -149,10 +149,15 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { /// Append the slice of items to the list. Allocates more /// memory as necessary. pub fn appendSlice(self: *Self, items: SliceConst) !void { + try self.ensureCapacity(self.items.len + items.len); + self.appendSliceAssumeCapacity(items); + } + + /// Append the slice of items to the list, asserting the capacity is already + /// enough to store the new items. + pub fn appendSliceAssumeCapacity(self: *Self, items: SliceConst) void { const oldlen = self.items.len; const newlen = self.items.len + items.len; - - try self.ensureCapacity(newlen); self.items.len = newlen; mem.copy(T, self.items[oldlen..], items); } @@ -378,10 +383,16 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ /// Append the slice of items to the list. Allocates more /// memory as necessary. pub fn appendSlice(self: *Self, allocator: *Allocator, items: SliceConst) !void { + try self.ensureCapacity(allocator, self.items.len + items.len); + self.appendSliceAssumeCapacity(items); + } + + /// Append the slice of items to the list, asserting the capacity is enough + /// to store the new items. + pub fn appendSliceAssumeCapacity(self: *Self, items: SliceConst) void { const oldlen = self.items.len; const newlen = self.items.len + items.len; - try self.ensureCapacity(allocator, newlen); self.items.len = newlen; mem.copy(T, self.items[oldlen..], items); } diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index b7c575a04a..a33d0d8e3e 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -527,6 +527,30 @@ pub const File = struct { } } + pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) PWriteError!usize { + // TODO take advantage of copy_file_range OS APIs + var buf: [8 * 4096]u8 = undefined; + const adjusted_count = math.min(buf.len, len); + const amt_read = try in.pread(buf[0..adjusted_count], in_offset); + if (amt_read == 0) return 0; + return out.pwrite(buf[0..amt_read], out_offset); + } + + /// Returns the number of bytes copied. If the number read is smaller than `buffer.len`, it + /// means the in file reached the end. Reaching the end of a file is not an error condition. + pub fn copyRangeAll(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) PWriteError!usize { + var total_bytes_copied = 0; + var in_off = in_offset; + var out_off = out_offset; + while (total_bytes_copied < len) { + const amt_copied = try copyRange(in, in_off, out, out_off, len - total_bytes_copied); + if (amt_copied == 0) return total_bytes_copied; + total_bytes_copied += amt_copied; + in_off += amt_copied; + out_off += amt_copied; + } + } + pub const WriteFileOptions = struct { in_offset: u64 = 0, diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 47cc2f91e9..e2addb9b38 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -10,7 +10,7 @@ const Wyhash = std.hash.Wyhash; const Allocator = mem.Allocator; const builtin = @import("builtin"); -const want_modification_safety = builtin.mode != .ReleaseFast; +const want_modification_safety = std.debug.runtime_safety; const debug_u32 = if (want_modification_safety) u32 else void; pub fn AutoHashMap(comptime K: type, comptime V: type) type { @@ -219,6 +219,10 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 return put_result.old_kv; } + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + assert(self.putAssumeCapacity(key, value) == null); + } + pub fn get(hm: *const Self, key: K) ?*KV { if (hm.entries.len == 0) { return null; diff --git a/src-self-hosted/TypedValue.zig b/src-self-hosted/TypedValue.zig new file mode 100644 index 0000000000..0651ca9ec9 --- /dev/null +++ b/src-self-hosted/TypedValue.zig @@ -0,0 +1,23 @@ +const std = @import("std"); +const Type = @import("type.zig").Type; +const Value = @import("value.zig").Value; +const Allocator = std.mem.Allocator; +const TypedValue = @This(); + +ty: Type, +val: Value, + +/// Memory management for TypedValue. The main purpose of this type +/// is to be small and have a deinit() function to free associated resources. +pub const Managed = struct { + /// If the tag value is less than Tag.no_payload_count, then no pointer + /// dereference is needed. + typed_value: TypedValue, + /// If this is `null` then there is no memory management needed. + arena: ?*std.heap.ArenaAllocator.State = null, + + pub fn deinit(self: *ManagedTypedValue, allocator: *Allocator) void { + if (self.arena) |a| a.promote(allocator).deinit(); + self.* = undefined; + } +}; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index e32e8cdaea..6deb1a4c8e 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -5,6 +5,7 @@ const ArrayListUnmanaged = std.ArrayListUnmanaged; const LinkedList = std.TailQueue; const Value = @import("value.zig").Value; const Type = @import("type.zig").Type; +const TypedValue = @import("TypedValue.zig"); const assert = std.debug.assert; const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; @@ -167,11 +168,6 @@ pub const Inst = struct { }; }; -pub const TypedValue = struct { - ty: Type, - val: Value, -}; - fn swapRemoveElem(allocator: *Allocator, comptime T: type, item: T, list: *ArrayListUnmanaged(T)) void { var i: usize = 0; while (i < list.items.len) { @@ -192,46 +188,125 @@ pub const Module = struct { root_scope: *Scope.ZIRModule, /// Pointer to externally managed resource. bin_file: *link.ElfFile, - failed_decls: ArrayListUnmanaged(*Decl) = .{}, - failed_fns: ArrayListUnmanaged(*Fn) = .{}, - failed_files: ArrayListUnmanaged(*Scope.ZIRModule) = .{}, + /// It's rare for a decl to be exported, so we save memory by having a sparse map of + /// Decl pointers to details about them being exported. + /// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. + decl_exports: std.AutoHashMap(*Decl, []*Export), + /// This models the Decls that perform exports, so that `decl_exports` can be updated when a Decl + /// is modified. Note that the key of this table is not the Decl being exported, but the Decl that + /// is performing the export of another Decl. + /// This table owns the Export memory. + export_owners: std.AutoHashMap(*Decl, []*Export), + /// Maps fully qualified namespaced names to the Decl struct for them. decl_table: std.AutoHashMap(Decl.Hash, *Decl), + optimize_mode: std.builtin.Mode, link_error_flags: link.ElfFile.ErrorFlags = .{}, + /// We optimize memory usage for a compilation with no compile errors by storing the + /// error messages and mapping outside of `Decl`. + /// The ErrorMsg memory is owned by the decl, using Module's allocator. + failed_decls: std.AutoHashMap(*Decl, *ErrorMsg), + /// We optimize memory usage for a compilation with no compile errors by storing the + /// error messages and mapping outside of `Fn`. + /// The ErrorMsg memory is owned by the `Fn`, using Module's allocator. + failed_fns: std.AutoHashMap(*Fn, *ErrorMsg), + /// Using a map here for consistency with the other fields here. + /// The ErrorMsg memory is owned by the `Scope.ZIRModule`, using Module's allocator. + failed_files: std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg), + /// Using a map here for consistency with the other fields here. + /// The ErrorMsg memory is owned by the `Export`, using Module's allocator. + failed_exports: std.AutoHashMap(*Export, *ErrorMsg), + + pub const Export = struct { + options: std.builtin.ExportOptions, + /// Byte offset into the file that contains the export directive. + src: usize, + /// Represents the position of the export, if any, in the output file. + link: link.ElfFile.Export, + /// The Decl that performs the export. Note that this is *not* the Decl being exported. + owner_decl: *Decl, + status: enum { in_progress, failed, complete }, + }; + pub const Decl = struct { - /// Contains the memory for `typed_value` and this `Decl` itself. - /// If the Decl is a function, also contains that memory. - /// If the decl has any export nodes, also contains that memory. - /// TODO look into using a more memory efficient arena that will cost less bytes per decl. - /// This one has a minimum allocation of 4096 bytes. - arena: std.heap.ArenaAllocator.State, /// This name is relative to the containing namespace of the decl. It uses a null-termination /// to save bytes, since there can be a lot of decls in a compilation. The null byte is not allowed /// in symbol names, because executable file formats use null-terminated strings for symbol names. + /// All Decls have names, even values that are not bound to a zig namespace. This is necessary for + /// mapping them to an address in the output file. + /// Memory owned by this decl, using Module's allocator. name: [*:0]const u8, - /// It's rare for a decl to be exported, and it's even rarer for a decl to be mapped to more - /// than one export, so we use a linked list to save memory. - export_node: ?*LinkedList(std.builtin.ExportOptions).Node = null, + /// The direct parent container of the Decl. This field will need to get more fleshed out when + /// self-hosted supports proper struct types and Zig AST => ZIR. + /// Reference to externally owned memory. + scope: *Scope.ZIRModule, /// Byte offset into the source file that contains this declaration. /// This is the base offset that src offsets within this Decl are relative to. src: usize, + /// The most recent value of the Decl after a successful semantic analysis. + /// The tag for this union is determined by the tag value of the analysis field. + typed_value: union { + never_succeeded, + most_recent: TypedValue.Managed, + }, /// Represents the "shallow" analysis status. For example, for decls that are functions, /// the function type is analyzed with this set to `in_progress`, however, the semantic /// analysis of the function body is performed with this value set to `success`. Functions /// have their own analysis status field. - analysis: union(enum) { - in_progress, - failure: ErrorMsg, - success: TypedValue, + analysis: enum { + initial_in_progress, + /// This Decl might be OK but it depends on another one which did not successfully complete + /// semantic analysis. This Decl never had a value computed. + initial_dependency_failure, + /// Semantic analysis failure. This Decl never had a value computed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + initial_sema_failure, + /// In this case the `typed_value.most_recent` can still be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + codegen_failure, + /// This Decl might be OK but it depends on another one which did not successfully complete + /// semantic analysis. There is a most recent value available. + repeat_dependency_failure, + /// Semantic anlaysis failure, but the `typed_value.most_recent` can be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + repeat_sema_failure, + /// Completed successfully before; the `typed_value.most_recent` can be accessed, and + /// new semantic analysis is in progress. + repeat_in_progress, + /// Everything is done and updated. + complete, }, - /// The direct container of the Decl. This field will need to get more fleshed out when - /// self-hosted supports proper struct types and Zig AST => ZIR. - scope: *Scope.ZIRModule, + + /// Represents the position of the code, if any, in the output file. + /// This is populated regardless of semantic analysis and code generation. + /// This value is `undefined` if the type has no runtime bits. + link: link.ElfFile.Decl, + + /// The set of other decls whose typed_value could possibly change if this Decl's + /// typed_value is modified. + /// TODO look into using a lightweight map/set data structure rather than a linear array. + dependants: ArrayListUnmanaged(*Decl) = .{}, + + pub fn typedValue(self: Decl) ?TypedValue { + switch (self.analysis) { + .initial_in_progress, + .initial_dependency_failure, + .initial_sema_failure, + => return null, + .codegen_failure, + .repeat_dependency_failure, + .repeat_sema_failure, + .repeat_in_progress, + .complete, + => return self.typed_value.most_recent, + } + } pub fn destroy(self: *Decl, allocator: *Allocator) void { - var arena = self.arena.promote(allocator); - arena.deinit(); + allocator.free(mem.spanZ(u8, self.name)); + if (self.typedValue()) |tv| tv.deinit(allocator); + allocator.destroy(self); } pub const Hash = [16]u8; @@ -252,8 +327,10 @@ pub const Module = struct { pub const Fn = struct { fn_type: Type, analysis: union(enum) { + queued, in_progress: *Analysis, - failure: ErrorMsg, + /// There will be a corresponding ErrorMsg in Module.failed_fns + failure, success: Body, }, /// The direct container of the Fn. This field will need to get more fleshed out when @@ -290,68 +367,36 @@ pub const Module = struct { /// Relative to the owning package's root_src_dir. /// Reference to external memory, not owned by ZIRModule. sub_file_path: []const u8, - contents: union(enum) { + source: union { unloaded, - parse_failure: ParseFailure, - success: Contents, + bytes: [:0]const u8, }, - pub const ParseFailure = struct { - source: [:0]const u8, - errors: []ErrorMsg, - - pub fn deinit(self: *ParseFailure, allocator: *Allocator) void { - allocator.free(self.errors); - allocator.free(source); - } - }; - pub const Contents = struct { - source: [:0]const u8, + contents: union { + not_available, module: *text.Module, - }; + }, + status: enum { + unloaded, + unloaded_parse_failure, + loaded_parse_failure, + loaded_success, + }, pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { - switch (self.contents) { - .unloaded => {}, - .parse_failure => |pf| pd.deinit(allocator), - .success => |contents| { + switch (self.status) { + .unloaded, + .unloaded_parse_failure, + => {}, + .loaded_success => { + allocator.free(contents.source); + self.contents.module.deinit(allocator); + }, + .loaded_parse_failure => { allocator.free(contents.source); - contents.src_zir_module.deinit(allocator); }, } self.* = undefined; } - - pub fn loadContents(self: *ZIRModule, allocator: *Allocator) !*Contents { - if (self.contents) |contents| return contents; - - const max_size = std.math.maxInt(u32); - const source = try self.root_pkg_dir.readFileAllocOptions(allocator, self.root_src_path, max_size, 1, 0); - errdefer allocator.free(source); - - var errors = std.ArrayList(ErrorMsg).init(allocator); - defer errors.deinit(); - - var src_zir_module = try text.parse(allocator, source, &errors); - errdefer src_zir_module.deinit(allocator); - - switch (self.contents) { - .parse_failure => |pf| pf.deinit(allocator), - .unloaded => {}, - .success => unreachable, - } - - if (errors.items.len != 0) { - self.contents = .{ .parse_failure = errors.toOwnedSlice() }; - return error.ParseFailure; - } - self.contents = .{ - .success = .{ - .source = source, - .module = src_zir_module, - }, - }; - return &self.contents.success; - } }; /// This is a temporary structure, references to it are valid only @@ -436,7 +481,7 @@ pub const Module = struct { // Analyze the root source file now. self.analyzeRoot(self.root_scope) catch |err| switch (err) { error.AnalysisFail => { - assert(self.totalErrorCount() != 0); + assert(self.failed_files.size != 0); }, else => |e| return e, }; @@ -446,9 +491,10 @@ pub const Module = struct { } pub fn totalErrorCount(self: *Module) usize { - return self.failed_decls.items.len + - self.failed_fns.items.len + - self.failed_decls.items.len + + return self.failed_decls.size + + self.failed_fns.size + + self.failed_decls.size + + self.failed_exports.size + @boolToInt(self.link_error_flags.no_entry_point_found); } @@ -459,26 +505,42 @@ pub const Module = struct { var errors = std.ArrayList(AllErrors.Message).init(self.allocator); defer errors.deinit(); - for (self.failed_files.items) |scope| { - const source = scope.parse_failure.source; - for (scope.parse_failure.errors) |parse_error| { - AllErrors.add(&arena, &errors, scope.sub_file_path, source, parse_error); + { + var it = self.failed_files.iterator(); + while (it.next()) |kv| { + const scope = kv.key; + const err_msg = kv.value; + const source = scope.parse_failure.source; + AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg); } } - - for (self.failed_fns.items) |func| { - const source = func.scope.success.source; - for (func.analysis.failure) |err_msg| { + { + var it = self.failed_fns.iterator(); + while (it.next()) |kv| { + const func = kv.key; + const err_msg = kv.value; + const source = func.scope.success.source; AllErrors.add(&arena, &errors, func.scope.sub_file_path, source, err_msg); } } - - for (self.failed_decls.items) |decl| { - const source = decl.scope.success.source; - for (decl.analysis.failure) |err_msg| { + { + var it = self.failed_decls.iterator(); + while (it.next()) |kv| { + const decl = kv.key; + const err_msg = kv.value; + const source = decl.scope.success.source; AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg); } } + { + var it = self.failed_exports.iterator(); + while (it.next()) |kv| { + const decl = kv.key.owner_decl; + const err_msg = kv.value; + const source = decl.scope.success.source; + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg); + } + } if (self.link_error_flags.no_entry_point_found) { try errors.append(.{ @@ -508,23 +570,81 @@ pub const Module = struct { // Here we simulate adding a source file which was previously not part of the compilation, // which means scanning the decls looking for exports. // TODO also identify decls that need to be deleted. - const contents = blk: { - // Clear parse errors. - swapRemoveElem(self.allocator, *Scope.ZIRModule, root_scope, self.failed_files); - try self.failed_files.ensureCapacity(self.allocator, self.failed_files.items.len + 1); - break :blk root_scope.loadContents(self.allocator) catch |err| switch (err) { - error.ParseFailure => { - self.failed_files.appendAssumeCapacity(root_scope); + const src_module = switch (root_scope.status) { + .unloaded => blk: { + try self.failed_files.ensureCapacity(self.failed_files.size + 1); + + var keep_source = false; + const source = try self.root_pkg_dir.readFileAllocOptions( + self.allocator, + self.root_src_path, + std.math.maxInt(u32), + 1, + 0, + ); + defer if (!keep_source) self.allocator.free(source); + + var keep_zir_module = false; + const zir_module = try self.allocator.create(text.Module); + defer if (!keep_zir_module) self.allocator.destroy(zir_module); + + zir_module.* = try text.parse(self.allocator, source); + defer if (!keep_zir_module) zir_module.deinit(self.allocator); + + if (zir_module.error_msg) |src_err_msg| { + self.failed_files.putAssumeCapacityNoClobber( + root_scope, + try ErrorMsg.create(self.allocator, src_err_msg.byte_offset, "{}", .{src_err_msg.msg}), + ); + root_scope.status = .loaded_parse_failure; + root_scope.source = .{ .bytes = source }; + keep_source = true; return error.AnalysisFail; - }, - else => |e| return e, - }; + } + + root_scope.status = .loaded_success; + root_scope.source = .{ .bytes = source }; + keep_source = true; + root_scope.contents = .{ .module = zir_module }; + keep_zir_module = true; + + break :blk zir_module; + }, + + .unloaded_parse_failure, .loaded_parse_failure => return error.AnalysisFail, + .loaded_success => root_scope.contents.module, }; + + // Here we ensure enough queue capacity to store all the decls, so that later we can use + // appendAssumeCapacity. + try self.analysis_queue.ensureCapacity(self.analysis_queue.items.len + contents.module.decls.len); + for (contents.module.decls) |decl| { if (decl.cast(text.Inst.Export)) |export_inst| { try analyzeExport(self, &root_scope.base, export_inst); } } + + while (self.analysis_queue.popOrNull()) |work_item| { + switch (work_item) { + .decl => |decl| switch (decl.analysis) { + .success => |typed_value| { + var arena = decl.arena.promote(self.allocator); + const update_result = self.bin_file.updateDecl( + self.*, + typed_value, + decl.export_node, + decl.fullyQualifiedNameHash(), + &arena.allocator, + ); + decl.arena = arena.state; + if (try update_result) |err_msg| { + decl.analysis = .{ .codegen_failure = err_msg }; + } + }, + }, + } + } } fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { @@ -548,21 +668,41 @@ pub const Module = struct { break :blk new_decl; }; - var decl_scope: Scope.DeclAnalysis = .{ .decl = new_decl }; + swapRemoveElem(self.allocator, *Scope.ZIRModule, root_scope, self.failed_decls); + var decl_scope: Scope.DeclAnalysis = .{ + .base = .{ .parent = scope }, + .decl = new_decl, + }; const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { - error.AnalysisFail => return error.AnalysisFail, + error.AnalysisFail => { + assert(new_decl.analysis == .failure); + return error.AnalysisFail; + }, else => |e| return e, }; new_decl.analysis = .{ .success = typed_value }; - if (try self.bin_file.updateDecl(self.*, typed_value, new_decl.export_node, hash)) |err_msg| { - new_decl.analysis = .{ .success = typed_value }; - } else |err| { - return err; - } + // We ensureCapacity when scanning for decls. + self.analysis_queue.appendAssumeCapacity(.{ .decl = new_decl }); return new_decl; } } + fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { + const decl = try self.resolveDecl(scope, old_inst); + switch (decl.analysis) { + .initial_in_progress => unreachable, + .repeat_in_progress => unreachable, + .initial_dependency_failure, + .repeat_dependency_failure, + .initial_sema_failure, + .repeat_sema_failure, + .codegen_failure, + => return error.AnalysisFail, + + .complete => return decl, + } + } + fn resolveInst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Inst { if (scope.cast(Scope.Block)) |block| { if (block.func.inst_table.get(old_inst)) |kv| { @@ -570,7 +710,7 @@ pub const Module = struct { } } - const decl = try self.resolveDecl(scope, old_inst); + const decl = try self.resolveCompleteDecl(scope, old_inst); const decl_ref = try self.analyzeDeclRef(scope, old_inst.src, decl); return self.analyzeDeref(scope, old_inst.src, decl_ref); } @@ -621,29 +761,52 @@ pub const Module = struct { } fn analyzeExport(self: *Module, scope: *Scope, export_inst: *text.Inst.Export) !void { + try self.decl_exports.ensureCapacity(self.decl_exports.size + 1); + try self.export_owners.ensureCapacity(self.export_owners.size + 1); const symbol_name = try self.resolveConstString(scope, export_inst.positionals.symbol_name); - const decl = try self.resolveDecl(scope, export_inst.positionals.value); - - switch (decl.analysis) { - .in_progress => unreachable, - .failure => return error.AnalysisFail, - .success => |typed_value| switch (typed_value.ty.zigTypeTag()) { - .Fn => {}, - else => return self.fail( - scope, - export_inst.positionals.value.src, - "unable to export type '{}'", - .{typed_value.ty}, - ), - }, + const exported_decl = try self.resolveCompleteDecl(scope, export_inst.positionals.value); + const typed_value = exported_decl.typed_value.most_recent.typed_value; + switch (typed_value.ty.zigTypeTag()) { + .Fn => {}, + else => return self.fail( + scope, + export_inst.positionals.value.src, + "unable to export type '{}'", + .{typed_value.ty}, + ), } - const Node = LinkedList(std.builtin.ExportOptions).Node; - export_node = try decl.arena.promote(self.allocator).allocator.create(Node); - export_node.* = .{ .data = .{ .name = symbol_name } }; - decl.export_node = export_node; + const new_export = try self.allocator.create(Export); + errdefer self.allocator.destroy(new_export); - // TODO Avoid double update in the case of exporting a decl that we just created. - self.bin_file.updateDeclExports(); + const owner_decl = scope.getDecl(); + + new_export.* = .{ + .options = .{ .data = .{ .name = symbol_name } }, + .src = export_inst.base.src, + .link = .{}, + .owner_decl = owner_decl, + .status = .in_progress, + }; + + // Add to export_owners table. + const eo_gop = self.export_owners.getOrPut(owner_decl) catch unreachable; + if (!eo_gop.found_existing) { + eo_gop.kv.value = &[0]*Export{}; + } + eo_gop.kv.value = try self.allocator.realloc(eo_gop.kv.value, eo_gop.kv.value.len + 1); + eo_gop.kv.value[eo_gop.kv.value.len - 1] = new_export; + errdefer eo_gop.kv.value = self.allocator.shrink(eo_gop.kv.value, eo_gop.kv.value.len - 1); + + // Add to exported_decl table. + const de_gop = self.decl_exports.getOrPut(exported_decl) catch unreachable; + if (!de_gop.found_existing) { + de_gop.kv.value = &[0]*Export{}; + } + de_gop.kv.value = try self.allocator.realloc(de_gop.kv.value, de_gop.kv.value.len + 1); + de_gop.kv.value[de_gop.kv.value.len - 1] = new_export; + errdefer de_gop.kv.value = self.allocator.shrink(de_gop.kv.value, de_gop.kv.value.len - 1); + + try self.bin_file.updateDeclExports(self, decl, de_gop.kv.value); } /// TODO should not need the cast on the last parameter at the callsites @@ -1636,6 +1799,31 @@ pub const Module = struct { pub const ErrorMsg = struct { byte_offset: usize, msg: []const u8, + + pub fn create(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !*ErrorMsg { + const self = try allocator.create(ErrorMsg); + errdefer allocator.destroy(ErrorMsg); + self.* = init(allocator, byte_offset, format, args); + return self; + } + + /// Assumes the ErrorMsg struct and msg were both allocated with allocator. + pub fn destroy(self: *ErrorMsg, allocator: *Allocator) void { + self.deinit(allocator); + allocator.destroy(self); + } + + pub fn init(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !ErrorMsg { + return ErrorMsg{ + .byte_offset = byte_offset, + .msg = try std.fmt.allocPrint(allocator, format, args), + }; + } + + pub fn deinit(self: *ErrorMsg, allocator: *Allocator) void { + allocator.free(err_msg.msg); + self.* = undefined; + } }; pub fn main() anyerror!void { diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index 762eb07b42..d87fef7a2d 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -406,8 +406,8 @@ pub const ErrorMsg = struct { pub const Module = struct { decls: []*Inst, - errors: []ErrorMsg, arena: std.heap.ArenaAllocator.State, + error_msg: ?ErrorMsg = null, pub const Body = struct { instructions: []*Inst, @@ -415,7 +415,6 @@ pub const Module = struct { pub fn deinit(self: *Module, allocator: *Allocator) void { allocator.free(self.decls); - allocator.free(self.errors); self.arena.promote(allocator).deinit(); self.* = undefined; } @@ -576,22 +575,21 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module .i = 0, .source = source, .global_name_map = &global_name_map, - .errors = .{}, .decls = .{}, }; errdefer parser.arena.deinit(); parser.parseRoot() catch |err| switch (err) { error.ParseFailure => { - assert(parser.errors.items.len != 0); + assert(parser.error_msg != null); }, else => |e| return e, }; return Module{ .decls = parser.decls.toOwnedSlice(allocator), - .errors = parser.errors.toOwnedSlice(allocator), .arena = parser.arena.state, + .error_msg = parser.error_msg, }; } @@ -600,9 +598,9 @@ const Parser = struct { arena: std.heap.ArenaAllocator, i: usize, source: [:0]const u8, - errors: std.ArrayListUnmanaged(ErrorMsg), decls: std.ArrayListUnmanaged(*Inst), global_name_map: *std.StringHashMap(usize), + error_msg: ?ErrorMsg = null, const Body = struct { instructions: std.ArrayList(*Inst), @@ -776,10 +774,9 @@ const Parser = struct { fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError { @setCold(true); - const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args); - (try self.errors.addOne()).* = .{ + self.error_msg = ErrorMsg{ .byte_offset = self.i, - .msg = msg, + .msg = try std.fmt.allocPrint(&self.arena.allocator, format, args), }; return error.ParseFailure; } @@ -971,7 +968,6 @@ pub fn emit_zir(allocator: *Allocator, old_module: ir.Module) !Module { return Module{ .decls = ctx.decls.toOwnedSlice(), .arena = ctx.arena, - .errors = &[0]ErrorMsg{}, }; } diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index c9e87d4092..03b2b61535 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -130,6 +130,20 @@ pub const ElfFile = struct { no_entry_point_found: bool = false, }; + /// TODO it's too bad this optional takes up double the memory it should + pub const Decl = struct { + /// Each decl always gets a local symbol with the fully qualified name. + /// The vaddr and size are found here directly. + /// The file offset is found by computing the vaddr offset from the section vaddr + /// the symbol references, and adding that to the file offset of the section. + local_sym_index: ?usize = null, + }; + + /// TODO it's too bad this optional takes up double the memory it should + pub const Export = struct { + sym_index: ?usize = null, + }; + pub fn deinit(self: *ElfFile) void { self.sections.deinit(self.allocator); self.program_headers.deinit(self.allocator); @@ -138,7 +152,7 @@ pub const ElfFile = struct { self.offset_table.deinit(self.allocator); } - // `expand_num / expand_den` is the factor of padding when allocation + // `alloc_num / alloc_den` is the factor of padding when allocation const alloc_num = 4; const alloc_den = 3; @@ -216,12 +230,21 @@ pub const ElfFile = struct { } fn makeString(self: *ElfFile, bytes: []const u8) !u32 { + try self.shstrtab.ensureCapacity(self.allocator, self.shstrtab.items.len + bytes.len + 1); const result = self.shstrtab.items.len; - try self.shstrtab.appendSlice(bytes); - try self.shstrtab.append(0); + self.shstrtab.appendSliceAssumeCapacity(bytes); + self.shstrtab.appendAssumeCapacity(0); return @intCast(u32, result); } + fn updateString(self: *ElfFile, old_str_off: u32, new_name: []const u8) !u32 { + const existing_name = self.getString(old_str_off); + if (mem.eql(u8, existing_name, new_name)) { + return old_str_off; + } + return self.makeString(new_name); + } + pub fn populateMissingMetadata(self: *ElfFile) !void { const small_ptr = switch (self.ptr_width) { .p32 => true, @@ -575,166 +598,200 @@ pub const ElfFile = struct { try self.file.pwriteAll(hdr_buf[0..index], 0); } - /// TODO Look into making this smaller to save memory. - /// Lots of redundant info here with the data stored in symbol structs. - const DeclSymbol = struct { - symbol_indexes: []usize, - vaddr: u64, - file_offset: u64, - size: u64, - }; - const AllocatedBlock = struct { vaddr: u64, file_offset: u64, size_capacity: u64, }; - fn allocateDeclSymbol(self: *ElfFile, size: u64) AllocatedBlock { + fn allocateTextBlock(self: *ElfFile, new_block_size: u64) !AllocatedBlock { const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; - todo(); - //{ - // // Now that we know the code size, we need to update the program header for executable code - // phdr.p_memsz = vaddr - phdr.p_vaddr; - // phdr.p_filesz = phdr.p_memsz; + const shdr = &self.sections.items[self.text_section_index.?]; - // const shdr = &self.sections.items[self.text_section_index.?]; - // shdr.sh_size = phdr.p_filesz; + const text_capacity = self.allocatedSize(shdr.sh_offset); + // TODO instead of looping here, maintain a free list and a pointer to the end. + const end_vaddr = blk: { + var start: u64 = 0; + var size: u64 = 0; + for (self.symbols.items) |sym| { + if (sym.st_value > start) { + start = sm.st_value; + size = sym.st_size; + } + } + break :blk start + (size * alloc_num / alloc_den); + }; - // self.phdr_table_dirty = true; // TODO look into making only the one program header dirty - // self.shdr_table_dirty = true; // TODO look into making only the one section dirty - //} + const text_size = end_vaddr - phdr.p_vaddr; + const needed_size = text_size + new_block_size; + if (needed_size > text_capacity) { + // Must move the entire text section. + const new_offset = self.findFreeSpace(needed_size, 0x1000); + const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, text_size); + if (amt != text_size) return error.InputOutput; + shdr.sh_offset = new_offset; + } + // Now that we know the code size, we need to update the program header for executable code + shdr.sh_size = needed_size; + phdr.p_memsz = needed_size; + phdr.p_filesz = needed_size; - //return self.writeSymbols(); + self.phdr_table_dirty = true; // TODO look into making only the one program header dirty + self.shdr_table_dirty = true; // TODO look into making only the one section dirty } - fn findAllocatedBlock(self: *ElfFile, vaddr: u64) AllocatedBlock { - todo(); + fn findAllocatedTextBlock(self: *ElfFile, sym: elf.Elf64_Sym) AllocatedBlock { + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + const shdr = &self.sections.items[self.text_section_index.?]; + + // Find the next sym after this one. + // TODO look into using a hash map to speed up perf. + const text_capacity = self.allocatedSize(shdr.sh_offset); + var next_vaddr_start = phdr.p_vaddr + text_capacity; + for (self.symbols.items) |elem| { + if (elem.st_value < sym.st_value) continue; + if (elem.st_value < next_vaddr_start) next_vaddr_start = elem.st_value; + } + return .{ + .vaddr = sym.st_value, + .file_offset = shdr.sh_offset + (sym.st_value - phdr.p_vaddr), + .size_capacity = next_vaddr_start - sym.st_value, + }; } - pub fn updateDecl( - self: *ElfFile, - module: ir.Module, - typed_value: ir.TypedValue, - decl_export_node: ?*std.LinkedList(std.builtin.ExportOptions).Node, - hash: ir.Module.Decl.Hash, - err_msg_allocator: *Allocator, - ) !?ir.ErrorMsg { + pub fn updateDecl(self: *ElfFile, module: *ir.Module, decl: *ir.Module.Decl) !void { var code = std.ArrayList(u8).init(self.allocator); defer code.deinit(); - const err_msg = try codegen.generateSymbol(typed_value, module, &code, err_msg_allocator); - if (err_msg != null) |em| return em; - - const export_count = blk: { - var export_node = decl_export_node; - var i: usize = 0; - while (export_node) |node| : (export_node = node.next) i += 1; - break :blk i; - }; - - // Find or create a symbol from the decl - var valid_sym_index_len: usize = 0; - const decl_symbol = blk: { - if (self.decl_table.getValue(hash)) |decl_symbol| { - valid_sym_index_len = decl_symbol.symbol_indexes.len; - decl_symbol.symbol_indexes = try self.allocator.realloc(usize, export_count); - - const existing_block = self.findAllocatedBlock(decl_symbol.vaddr); - if (code.items.len > existing_block.size_capacity) { - const new_block = self.allocateDeclSymbol(code.items.len); - decl_symbol.vaddr = new_block.vaddr; - decl_symbol.file_offset = new_block.file_offset; - decl_symbol.size = code.items.len; - } - break :blk decl_symbol; - } else { - const new_block = self.allocateDeclSymbol(code.items.len); - - const decl_symbol = try self.allocator.create(DeclSymbol); - errdefer self.allocator.destroy(decl_symbol); - - decl_symbol.* = .{ - .symbol_indexes = try self.allocator.alloc(usize, export_count), - .vaddr = new_block.vaddr, - .file_offset = new_block.file_offset, - .size = code.items.len, - }; - errdefer self.allocator.free(decl_symbol.symbol_indexes); - - try self.decl_table.put(hash, decl_symbol); - break :blk decl_symbol; - } - }; - - // Allocate new symbols. - { - var i: usize = valid_sym_index_len; - const old_len = self.symbols.items.len; - try self.symbols.resize(old_len + (decl_symbol.symbol_indexes.len - i)); - while (i < decl_symbol.symbol_indexes) : (i += 1) { - decl_symbol.symbol_indexes[i] = old_len + i; - } + const typed_value = decl.typed_value.most_recent.typed_value; + const err_msg = try codegen.generateSymbol(typed_value, module, &code, module.allocator); + if (err_msg != null) |em| { + decl.analysis = .codegen_failure; + _ = try module.failed_decls.put(decl, em); + return; } - var export_node = decl_export_node; - var export_index: usize = 0; - while (export_node) |node| : ({ - export_node = node.next; - export_index += 1; - }) { - if (node.data.section) |section_name| { + const file_offset = blk: { + const code_size = code.items.len; + const stt_bits: u8 = switch (typed_value.ty.zigTypeTag()) { + .Fn => elf.STT_FUNC, + else => elf.STT_OBJECT, + }; + + if (decl.link.local_sym_index) |local_sym_index| { + const local_sym = &self.symbols.items[local_sym_index]; + const existing_block = self.findAllocatedTextBlock(local_sym); + const file_offset = if (code_size > existing_block.size_capacity) fo: { + const new_block = self.allocateTextBlock(code_size); + local_sym.st_value = new_block.vaddr; + local_sym.st_size = code_size; + break :fo new_block.file_offset; + } else existing_block.file_offset; + local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(u8, decl.name)); + local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; + // TODO this write could be avoided if no fields of the symbol were changed. + try self.writeSymbol(local_sym_index); + break :blk file_offset; + } else { + try self.symbols.ensureCapacity(self.symbols.items.len + 1); + const decl_name = mem.spanZ(u8, decl.name); + const name_str_index = try self.makeString(decl_name); + const new_block = self.allocateTextBlock(code_size); + const local_sym_index = self.symbols.items.len; + + self.symbols.appendAssumeCapacity(self.allocator, .{ + .st_name = name_str_index, + .st_info = (elf.STB_LOCAL << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = new_block.vaddr, + .st_size = code_size, + }); + errdefer self.symbols.shrink(self.symbols.items.len - 1); + try self.writeSymbol(local_sym_index); + + self.symbol_count_dirty = true; + decl.link.local_sym_index = local_sym_index; + + break :blk new_block.file_offset; + } + }; + + try self.file.pwriteAll(code.items, file_offset); + + // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. + const decl_exports = module.decl_exports.get(decl) orelse &[0]*ir.Module.Export{}; + return self.updateDeclExports(module, decl, decl_exports); + } + + /// Must be called only after a successful call to `updateDecl`. + pub fn updateDeclExports( + self: *ElfFile, + module: *ir.Module, + decl: *const ir.Module.Decl, + exports: []const *const Export, + ) !void { + try self.symbols.ensureCapacity(self.symbols.items.len + exports.len); + const typed_value = decl.typed_value.most_recent.typed_value; + const decl_sym = self.symbols.items[decl.link.local_sym_index.?]; + + for (exports) |exp| { + if (exp.options.section) |section_name| { if (!mem.eql(u8, section_name, ".text")) { - try errors.ensureCapacity(errors.items.len + 1); - errors.appendAssumeCapacity(.{ - .byte_offset = 0, - .msg = try std.fmt.allocPrint(errors.allocator, "Unimplemented: ExportOptions.section", .{}), - }); + try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); + module.failed_exports.putAssumeCapacityNoClobber( + exp, + try ir.ErrorMsg.create(0, "Unimplemented: ExportOptions.section", .{}), + ); } } - const stb_bits = switch (node.data.linkage) { + const stb_bits = switch (exp.options.linkage) { .Internal => elf.STB_LOCAL, .Strong => blk: { - if (mem.eql(u8, node.data.name, "_start")) { + if (mem.eql(u8, exp.options.name, "_start")) { self.entry_addr = decl_symbol.vaddr; } break :blk elf.STB_GLOBAL; }, .Weak => elf.STB_WEAK, .LinkOnce => { - try errors.ensureCapacity(errors.items.len + 1); - errors.appendAssumeCapacity(.{ - .byte_offset = 0, - .msg = try std.fmt.allocPrint(errors.allocator, "Unimplemented: GlobalLinkage.LinkOnce", .{}), - }); + try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); + module.failed_exports.putAssumeCapacityNoClobber( + exp, + try ir.ErrorMsg.create(0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), + ); }, }; - const stt_bits = switch (typed_value.ty.zigTypeTag()) { - .Fn => elf.STT_FUNC, - else => elf.STT_OBJECT, - }; - const sym_index = decl_symbol.symbol_indexes[export_index]; - const name = blk: { - if (i < valid_sym_index_len) { - const name_stroff = self.symbols.items[sym_index].st_name; - const existing_name = self.getString(name_stroff); - if (mem.eql(u8, existing_name, node.data.name)) { - break :blk name_stroff; - } - } - break :blk try self.makeString(node.data.name); - }; - self.symbols.items[sym_index] = .{ - .st_name = name, - .st_info = (stb_bits << 4) | stt_bits, - .st_other = 0, - .st_shndx = self.text_section_index.?, - .st_value = decl_symbol.vaddr, - .st_size = code.items.len, - }; - } + const stt_bits: u8 = @truncate(u4, decl_sym.st_info); + if (exp.link.sym_index) |i| { + const sym = &self.symbols.items[i]; + sym.* = .{ + .st_name = try self.updateString(sym.st_name, exp.options.name), + .st_info = (stb_bits << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = decl_sym.st_value, + .st_size = decl_sym.st_size, + }; + try self.writeSymbol(i); + } else { + const name = try self.makeString(exp.options.name); + const i = self.symbols.items.len; + self.symbols.appendAssumeCapacity(self.allocator, .{ + .st_name = sn.name, + .st_info = (stb_bits << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = decl_sym.st_value, + .st_size = decl_sym.st_size, + }); + errdefer self.symbols.shrink(self.symbols.items.len - 1); + try self.writeSymbol(i); - try self.file.pwriteAll(code.items, decl_symbol.file_offset); + self.symbol_count_dirty = true; + exp.link.sym_index = i; + } + } } fn writeProgHeader(self: *ElfFile, index: usize) !void { @@ -782,7 +839,48 @@ pub const ElfFile = struct { } } - fn writeSymbols(self: *ElfFile) !void { + fn writeSymbol(self: *ElfFile, index: usize) !void { + const syms_sect = &self.sections.items[self.symtab_section_index.?]; + // Make sure we are not pointlessly writing symbol data that will have to get relocated + // due to running out of space. + if (self.symbol_count_dirty) { + const allocated_size = self.allocatedSize(syms_sect.sh_offset); + const needed_size = self.symbols.items.len * sym_size; + if (needed_size > allocated_size) { + return self.writeAllSymbols(); + } + } + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + switch (self.ptr_width) { + .p32 => { + var sym = [1]elf.Elf32_Sym{ + .{ + .st_name = self.symbols.items[index].st_name, + .st_value = @intCast(u32, self.symbols.items[index].st_value), + .st_size = @intCast(u32, self.symbols.items[index].st_size), + .st_info = self.symbols.items[index].st_info, + .st_other = self.symbols.items[index].st_other, + .st_shndx = self.symbols.items[index].st_shndx, + }, + }; + if (foreign_endian) { + bswapAllFields(elf.Elf32_Sym, &sym[0]); + } + const off = syms_sect.sh_offset + @sizeOf(elf.Elf32_Sym) * index; + try self.file.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); + }, + .p64 => { + var sym = [1]elf.Elf64_Sym{self.symbols.items[index]}; + if (foreign_endian) { + bswapAllFields(elf.Elf64_Sym, &sym[0]); + } + const off = syms_sect.sh_offset + @sizeOf(elf.Elf64_Sym) * index; + try self.file.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); + }, + } + } + + fn writeAllSymbols(self: *ElfFile) !void { const small_ptr = self.ptr_width == .p32; const syms_sect = &self.sections.items[self.symtab_section_index.?]; const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 8ace5a096f..283f0adc74 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -5,8 +5,7 @@ const Allocator = std.mem.Allocator; const Target = std.Target; /// This is the raw data, with no bookkeeping, no memory awareness, no de-duplication. -/// It's important for this struct to be small. -/// It is not copyable since it may contain references to its inner data. +/// It's important for this type to be small. /// Types are not de-duplicated, which helps with multi-threading since it obviates the requirement /// of obtaining a lock on a global type table, as well as making the /// garbage collection bookkeeping simpler. @@ -51,6 +50,7 @@ pub const Type = extern union { .comptime_int => return .ComptimeInt, .comptime_float => return .ComptimeFloat, .noreturn => return .NoReturn, + .@"null" => return .Null, .fn_noreturn_no_args => return .Fn, .fn_naked_noreturn_no_args => return .Fn, @@ -184,6 +184,8 @@ pub const Type = extern union { .noreturn, => return out_stream.writeAll(@tagName(t)), + .@"null" => return out_stream.writeAll("@TypeOf(null)"), + .const_slice_u8 => return out_stream.writeAll("[]const u8"), .fn_noreturn_no_args => return out_stream.writeAll("fn() noreturn"), .fn_naked_noreturn_no_args => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), @@ -246,6 +248,7 @@ pub const Type = extern union { .comptime_int => return Value.initTag(.comptime_int_type), .comptime_float => return Value.initTag(.comptime_float_type), .noreturn => return Value.initTag(.noreturn_type), + .@"null" => return Value.initTag(.null_type), .fn_noreturn_no_args => return Value.initTag(.fn_noreturn_no_args_type), .fn_naked_noreturn_no_args => return Value.initTag(.fn_naked_noreturn_no_args_type), .fn_ccc_void_no_args => return Value.initTag(.fn_ccc_void_no_args_type), @@ -286,6 +289,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .array_u8_sentinel_0, .const_slice_u8, @@ -329,6 +333,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .array_u8_sentinel_0, .single_const_pointer, @@ -372,6 +377,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .array_u8_sentinel_0, .fn_noreturn_no_args, @@ -416,6 +422,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, @@ -458,6 +465,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, @@ -489,6 +497,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, @@ -533,6 +542,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, @@ -606,6 +616,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -650,6 +661,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -693,6 +705,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -736,6 +749,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -779,6 +793,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -833,6 +848,7 @@ pub const Type = extern union { .type, .anyerror, .noreturn, + .@"null", .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, @@ -881,6 +897,7 @@ pub const Type = extern union { .c_void, .void, .noreturn, + .@"null", => return true, .int_unsigned => return ty.cast(Payload.IntUnsigned).?.bits == 0, @@ -933,6 +950,7 @@ pub const Type = extern union { .c_void, .void, .noreturn, + .@"null", .int_unsigned, .int_signed, .array, @@ -974,6 +992,7 @@ pub const Type = extern union { comptime_int, comptime_float, noreturn, + @"null", fn_noreturn_no_args, fn_naked_noreturn_no_args, fn_ccc_void_no_args, diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 2a2a8b54bc..1cad9cf129 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -10,7 +10,7 @@ const ir = @import("ir.zig"); /// This is the raw data, with no bookkeeping, no memory awareness, /// no de-duplication, and no type system awareness. -/// It's important for this struct to be small. +/// It's important for this type to be small. /// This union takes advantage of the fact that the first page of memory /// is unmapped, giving us 4096 possible enum tags that have no payload. pub const Value = extern union { @@ -46,6 +46,7 @@ pub const Value = extern union { comptime_int_type, comptime_float_type, noreturn_type, + null_type, fn_noreturn_no_args_type, fn_naked_noreturn_no_args_type, fn_ccc_void_no_args_type, @@ -138,6 +139,7 @@ pub const Value = extern union { .comptime_int_type => return out_stream.writeAll("comptime_int"), .comptime_float_type => return out_stream.writeAll("comptime_float"), .noreturn_type => return out_stream.writeAll("noreturn"), + .null_type => return out_stream.writeAll("@TypeOf(null)"), .fn_noreturn_no_args_type => return out_stream.writeAll("fn() noreturn"), .fn_naked_noreturn_no_args_type => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), .fn_ccc_void_no_args_type => return out_stream.writeAll("fn() callconv(.C) void"), @@ -209,6 +211,7 @@ pub const Value = extern union { .comptime_int_type => Type.initTag(.comptime_int), .comptime_float_type => Type.initTag(.comptime_float), .noreturn_type => Type.initTag(.noreturn), + .null_type => Type.initTag(.@"null"), .fn_noreturn_no_args_type => Type.initTag(.fn_noreturn_no_args), .fn_naked_noreturn_no_args_type => Type.initTag(.fn_naked_noreturn_no_args), .fn_ccc_void_no_args_type => Type.initTag(.fn_ccc_void_no_args), @@ -263,6 +266,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -319,6 +323,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -376,6 +381,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -438,6 +444,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -529,6 +536,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -582,6 +590,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -674,6 +683,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -736,6 +746,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, @@ -812,6 +823,7 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, From fda0eef9fbf2fe73baf09127c8925910dcd35205 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 12 May 2020 16:43:11 -0400 Subject: [PATCH 04/31] clear progress indicator before printing errors --- src/codegen.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/codegen.cpp b/src/codegen.cpp index 366eac4230..9661d615f3 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1794,6 +1794,16 @@ static LLVMValueRef ir_llvm_value(CodeGen *g, IrInstGen *instruction) { } void codegen_report_errors_and_exit(CodeGen *g) { + // Clear progress indicator before printing errors + if (g->sub_progress_node != nullptr) { + stage2_progress_end(g->sub_progress_node); + g->sub_progress_node = nullptr; + } + if (g->main_progress_node != nullptr) { + stage2_progress_end(g->main_progress_node); + g->main_progress_node = nullptr; + } + assert(g->errors.length != 0); for (size_t i = 0; i < g->errors.length; i += 1) { ErrorMsg *err = g->errors.at(i); From e3a0fac1a77a8c637c790670ff749879298becad Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 12 May 2020 20:11:47 -0400 Subject: [PATCH 05/31] self-hosted: link: global offset table support for decls --- src-self-hosted/Package.zig | 1 + src-self-hosted/codegen.zig | 23 ++-- src-self-hosted/ir.zig | 69 ++++++------ src-self-hosted/link.zig | 205 +++++++++++++++++++++++++++--------- 4 files changed, 198 insertions(+), 100 deletions(-) diff --git a/src-self-hosted/Package.zig b/src-self-hosted/Package.zig index eaf37f379d..c70b3b6bd0 100644 --- a/src-self-hosted/Package.zig +++ b/src-self-hosted/Package.zig @@ -50,3 +50,4 @@ pub fn add(self: *Package, name: []const u8, package: *Package) !void { const std = @import("std"); const mem = std.mem; const assert = std.debug.assert; +const Package = @This(); diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 501f8717ea..eee0bc55d2 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -5,13 +5,9 @@ const ir = @import("ir.zig"); const Type = @import("type.zig").Type; const Value = @import("value.zig").Value; const Target = std.Target; +const Allocator = mem.Allocator; -pub fn generateSymbol( - typed_value: ir.TypedValue, - module: ir.Module, - code: *std.ArrayList(u8), - errors: *std.ArrayList(ir.ErrorMsg), -) !void { +pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !?*ir.ErrorMsg { switch (typed_value.ty.zigTypeTag()) { .Fn => { const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; @@ -21,14 +17,14 @@ pub fn generateSymbol( .mod_fn = module_fn, .code = code, .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator), - .errors = errors, + .err_msg = null, }; defer function.inst_table.deinit(); for (module_fn.body.instructions) |inst| { const new_inst = function.genFuncInst(inst) catch |err| switch (err) { error.CodegenFail => { - assert(function.errors.items.len != 0); + assert(function.err_msg != null); break; }, else => |e| return e, @@ -36,7 +32,7 @@ pub fn generateSymbol( try function.inst_table.putNoClobber(inst, new_inst); } - return Symbol{ .errors = function.errors.toOwnedSlice() }; + return function.err_msg; }, else => @panic("TODO implement generateSymbol for non-function decls"), } @@ -47,7 +43,7 @@ const Function = struct { mod_fn: *const ir.Module.Fn, code: *std.ArrayList(u8), inst_table: std.AutoHashMap(*ir.Inst, MCValue), - errors: *std.ArrayList(ir.ErrorMsg), + err_msg: ?*ir.ErrorMsg, const MCValue = union(enum) { none, @@ -428,11 +424,8 @@ const Function = struct { fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } { @setCold(true); - try self.errors.ensureCapacity(self.errors.items.len + 1); - self.errors.appendAssumeCapacity(.{ - .byte_offset = src, - .msg = try std.fmt.allocPrint(self.errors.allocator, format, args), - }); + assert(self.err_msg == null); + self.err_msg = try ir.ErrorMsg.create(self.code.allocator, src, format, args); return error.CodegenFail; } }; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 6deb1a4c8e..52de991441 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -201,7 +201,7 @@ pub const Module = struct { decl_table: std.AutoHashMap(Decl.Hash, *Decl), optimize_mode: std.builtin.Mode, - link_error_flags: link.ElfFile.ErrorFlags = .{}, + link_error_flags: link.ElfFile.ErrorFlags = link.ElfFile.ErrorFlags{}, /// We optimize memory usage for a compilation with no compile errors by storing the /// error messages and mapping outside of `Decl`. @@ -247,7 +247,7 @@ pub const Module = struct { /// The most recent value of the Decl after a successful semantic analysis. /// The tag for this union is determined by the tag value of the analysis field. typed_value: union { - never_succeeded, + never_succeeded: void, most_recent: TypedValue.Managed, }, /// Represents the "shallow" analysis status. For example, for decls that are functions, @@ -278,12 +278,11 @@ pub const Module = struct { complete, }, - /// Represents the position of the code, if any, in the output file. + /// Represents the position of the code in the output file. /// This is populated regardless of semantic analysis and code generation. - /// This value is `undefined` if the type has no runtime bits. - link: link.ElfFile.Decl, + link: link.ElfFile.Decl = link.ElfFile.Decl.empty, - /// The set of other decls whose typed_value could possibly change if this Decl's + /// The shallow set of other decls whose typed_value could possibly change if this Decl's /// typed_value is modified. /// TODO look into using a lightweight map/set data structure rather than a linear array. dependants: ArrayListUnmanaged(*Decl) = .{}, @@ -368,11 +367,11 @@ pub const Module = struct { /// Reference to external memory, not owned by ZIRModule. sub_file_path: []const u8, source: union { - unloaded, + unloaded: void, bytes: [:0]const u8, }, contents: union { - not_available, + not_available: void, module: *text.Module, }, status: enum { @@ -575,9 +574,9 @@ pub const Module = struct { try self.failed_files.ensureCapacity(self.failed_files.size + 1); var keep_source = false; - const source = try self.root_pkg_dir.readFileAllocOptions( + const source = try self.root_pkg.root_src_dir.readFileAllocOptions( self.allocator, - self.root_src_path, + self.root_pkg.root_src_path, std.math.maxInt(u32), 1, 0, @@ -628,20 +627,7 @@ pub const Module = struct { while (self.analysis_queue.popOrNull()) |work_item| { switch (work_item) { .decl => |decl| switch (decl.analysis) { - .success => |typed_value| { - var arena = decl.arena.promote(self.allocator); - const update_result = self.bin_file.updateDecl( - self.*, - typed_value, - decl.export_node, - decl.fullyQualifiedNameHash(), - &arena.allocator, - ); - decl.arena = arena.state; - if (try update_result) |err_msg| { - decl.analysis = .{ .codegen_failure = err_msg }; - } - }, + .success => try self.bin_file.updateDecl(self, decl), }, } } @@ -653,22 +639,22 @@ pub const Module = struct { return kv.value; } else { const new_decl = blk: { - var decl_arena = std.heap.ArenaAllocator.init(self.allocator); - errdefer decl_arena.deinit(); - const new_decl = try decl_arena.allocator.create(Decl); - const name = try mem.dupeZ(&decl_arena.allocator, u8, old_inst.name); + try self.decl_table.ensureCapacity(self.decl_table.size + 1); + const new_decl = try self.allocator.create(Decl); + errdefer self.allocator.destroy(new_decl); + const name = try mem.dupeZ(self.allocator, u8, old_inst.name); + errdefer self.allocator.free(name); new_decl.* = .{ - .arena = decl_arena.state, .name = name, - .src = old_inst.src, - .analysis = .in_progress, .scope = scope.findZIRModule(), + .src = old_inst.src, + .typed_value = .{ .never_succeeded = {} }, + .analysis = .initial_in_progress, }; - try self.decl_table.putNoClobber(hash, new_decl); + self.decl_table.putAssumeCapacityNoClobber(hash, new_decl); break :blk new_decl; }; - swapRemoveElem(self.allocator, *Scope.ZIRModule, root_scope, self.failed_decls); var decl_scope: Scope.DeclAnalysis = .{ .base = .{ .parent = scope }, .decl = new_decl, @@ -1838,6 +1824,7 @@ pub fn main() anyerror!void { const bin_path = args[2]; const debug_error_trace = true; const output_zir = true; + const object_format: ?std.builtin.ObjectFormat = null; const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); @@ -1845,9 +1832,9 @@ pub fn main() anyerror!void { .target = native_info.target, .output_mode = .Exe, .link_mode = .Static, - .object_format = options.object_format orelse native_info.target.getObjectFormat(), + .object_format = object_format orelse native_info.target.getObjectFormat(), }); - defer bin_file.deinit(allocator); + defer bin_file.deinit(); var module = blk: { const root_pkg = try Package.create(allocator, std.fs.cwd(), ".", src_path); @@ -1857,7 +1844,9 @@ pub fn main() anyerror!void { errdefer allocator.destroy(root_scope); root_scope.* = .{ .sub_file_path = root_pkg.root_src_path, - .contents = .unloaded, + .source = .{ .unloaded = {} }, + .contents = .{ .not_available = {} }, + .status = .unloaded, }; break :blk Module{ @@ -1866,7 +1855,13 @@ pub fn main() anyerror!void { .root_scope = root_scope, .bin_file = &bin_file, .optimize_mode = .Debug, - .decl_table = std.AutoHashMap(Decl.Hash, *Decl).init(allocator), + .decl_table = std.AutoHashMap(Module.Decl.Hash, *Module.Decl).init(allocator), + .decl_exports = std.AutoHashMap(*Module.Decl, []*Module.Export).init(allocator), + .export_owners = std.AutoHashMap(*Module.Decl, []*Module.Export).init(allocator), + .failed_decls = std.AutoHashMap(*Module.Decl, *ErrorMsg).init(allocator), + .failed_fns = std.AutoHashMap(*Module.Fn, *ErrorMsg).init(allocator), + .failed_files = std.AutoHashMap(*Module.Scope.ZIRModule, *ErrorMsg).init(allocator), + .failed_exports = std.AutoHashMap(*Module.Export, *ErrorMsg).init(allocator), }; }; defer module.deinit(); diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 03b2b61535..865ff609b1 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -94,35 +94,40 @@ pub const ElfFile = struct { /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. - sections: std.ArrayListUnmanaged(elf.Elf64_Shdr) = .{}, + sections: std.ArrayListUnmanaged(elf.Elf64_Shdr) = std.ArrayListUnmanaged(elf.Elf64_Shdr){}, shdr_table_offset: ?u64 = null, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. - program_headers: std.ArrayListUnmanaged(elf.Elf64_Phdr) = .{}, + program_headers: std.ArrayListUnmanaged(elf.Elf64_Phdr) = std.ArrayListUnmanaged(elf.Elf64_Phdr){}, phdr_table_offset: ?u64 = null, /// The index into the program headers of a PT_LOAD program header with Read and Execute flags phdr_load_re_index: ?u16 = null, + /// The index into the program headers of the global offset table. + /// It needs PT_LOAD and Read flags. + phdr_got_index: ?u16 = null, entry_addr: ?u64 = null, - shstrtab: std.ArrayListUnmanaged(u8) = .{}, + shstrtab: std.ArrayListUnmanaged(u8) = std.ArrayListUnmanaged(u8){}, shstrtab_index: ?u16 = null, text_section_index: ?u16 = null, symtab_section_index: ?u16 = null, + got_section_index: ?u16 = null, /// The same order as in the file - symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, + symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = std.ArrayListUnmanaged(elf.Elf64_Sym){}, - /// Same order as in the file. - offset_table: std.ArrayListUnmanaged(aoeu) = .{}, + /// Same order as in the file. The value is the absolute vaddr value. + /// If the vaddr of the executable program header changes, the entire + /// offset table needs to be rewritten. + offset_table: std.ArrayListUnmanaged(u64) = std.ArrayListUnmanaged(u64){}, - /// This means the entire read-only executable program code needs to be rewritten. - phdr_load_re_dirty: bool = false, phdr_table_dirty: bool = false, shdr_table_dirty: bool = false, shstrtab_dirty: bool = false, - symtab_dirty: bool = false, + offset_table_count_dirty: bool = false, + symbol_count_dirty: bool = false, error_flags: ErrorFlags = ErrorFlags{}, @@ -130,18 +135,25 @@ pub const ElfFile = struct { no_entry_point_found: bool = false, }; - /// TODO it's too bad this optional takes up double the memory it should pub const Decl = struct { /// Each decl always gets a local symbol with the fully qualified name. /// The vaddr and size are found here directly. /// The file offset is found by computing the vaddr offset from the section vaddr /// the symbol references, and adding that to the file offset of the section. - local_sym_index: ?usize = null, + /// If this field is 0, it means the codegen size = 0 and there is no symbol or + /// offset table entry. + local_sym_index: u32, + /// This field is undefined for symbols with size = 0. + offset_table_index: u32, + + pub const empty = Decl{ + .local_sym_index = 0, + .offset_table_index = undefined, + }; }; - /// TODO it's too bad this optional takes up double the memory it should pub const Export = struct { - sym_index: ?usize = null, + sym_index: usize, }; pub fn deinit(self: *ElfFile) void { @@ -250,33 +262,57 @@ pub const ElfFile = struct { .p32 => true, .p64 => false, }; + const ptr_size: u8 = switch (self.ptr_width) { + .p32 => 4, + .p64 => 8, + }; if (self.phdr_load_re_index == null) { self.phdr_load_re_index = @intCast(u16, self.program_headers.items.len); const file_size = self.options.program_code_size_hint; const p_align = 0x1000; const off = self.findFreeSpace(file_size, p_align); //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); - try self.program_headers.append(.{ + try self.program_headers.append(self.allocator, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, .p_vaddr = default_entry_addr, .p_paddr = default_entry_addr, - .p_memsz = 0, + .p_memsz = file_size, .p_align = p_align, .p_flags = elf.PF_X | elf.PF_R, }); self.entry_addr = null; - self.phdr_load_re_dirty = true; + self.phdr_table_dirty = true; + } + if (self.phdr_got_index == null) { + self.phdr_got_index = @intCast(u16, self.program_headers.items.len); + const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint; + const off = self.findFreeSpace(file_size, ptr_size); + //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); + // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. + // we'll need to re-use that function anyway, in case the GOT grows and overlaps something + // else in virtual memory. + const default_got_addr = 0x80000000; + try self.program_headers.append(self.allocator, .{ + .p_type = elf.PT_LOAD, + .p_offset = off, + .p_filesz = file_size, + .p_vaddr = default_got_addr, + .p_paddr = default_got_addr, + .p_memsz = file_size, + .p_align = ptr_size, + .p_flags = elf.PF_R, + }); self.phdr_table_dirty = true; } if (self.shstrtab_index == null) { self.shstrtab_index = @intCast(u16, self.sections.items.len); assert(self.shstrtab.items.len == 0); - try self.shstrtab.append(0); // need a 0 at position 0 + try self.shstrtab.append(self.allocator, 0); // need a 0 at position 0 const off = self.findFreeSpace(self.shstrtab.items.len, 1); //std.debug.warn("found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len }); - try self.sections.append(.{ + try self.sections.append(self.allocator, .{ .sh_name = try self.makeString(".shstrtab"), .sh_type = elf.SHT_STRTAB, .sh_flags = 0, @@ -295,7 +331,7 @@ pub const ElfFile = struct { self.text_section_index = @intCast(u16, self.sections.items.len); const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; - try self.sections.append(.{ + try self.sections.append(self.allocator, .{ .sh_name = try self.makeString(".text"), .sh_type = elf.SHT_PROGBITS, .sh_flags = elf.SHF_ALLOC | elf.SHF_EXECINSTR, @@ -309,6 +345,24 @@ pub const ElfFile = struct { }); self.shdr_table_dirty = true; } + if (self.got_section_index == null) { + self.got_section_index = @intCast(u16, self.sections.items.len); + const phdr = &self.program_headers.items[self.phdr_got_index.?]; + + try self.sections.append(self.allocator, .{ + .sh_name = try self.makeString(".got"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_ALLOC, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = phdr.p_align, + .sh_entsize = ptr_size, + }); + self.shdr_table_dirty = true; + } if (self.symtab_section_index == null) { self.symtab_section_index = @intCast(u16, self.sections.items.len); const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); @@ -317,7 +371,7 @@ pub const ElfFile = struct { const off = self.findFreeSpace(file_size, min_align); //std.debug.warn("found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); - try self.sections.append(.{ + try self.sections.append(self.allocator, .{ .sh_name = try self.makeString(".symtab"), .sh_type = elf.SHT_SYMTAB, .sh_flags = 0, @@ -330,14 +384,14 @@ pub const ElfFile = struct { .sh_addralign = min_align, .sh_entsize = each_size, }); - self.symtab_dirty = true; self.shdr_table_dirty = true; + try self.writeAllSymbols(); } - const shsize: u64 = switch (ptr_width) { + const shsize: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Shdr), .p64 => @sizeOf(elf.Elf64_Shdr), }; - const shalign: u16 = switch (ptr_width) { + const shalign: u16 = switch (self.ptr_width) { .p32 => @alignOf(elf.Elf32_Shdr), .p64 => @alignOf(elf.Elf64_Shdr), }; @@ -345,11 +399,11 @@ pub const ElfFile = struct { self.shdr_table_offset = self.findFreeSpace(self.sections.items.len * shsize, shalign); self.shdr_table_dirty = true; } - const phsize: u64 = switch (ptr_width) { + const phsize: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Phdr), .p64 => @sizeOf(elf.Elf64_Phdr), }; - const phalign: u16 = switch (ptr_width) { + const phalign: u16 = switch (self.ptr_width) { .p32 => @alignOf(elf.Elf32_Phdr), .p64 => @alignOf(elf.Elf64_Phdr), }; @@ -399,7 +453,7 @@ pub const ElfFile = struct { try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, } - self.phdr_table_offset = false; + self.phdr_table_dirty = false; } { @@ -432,11 +486,10 @@ pub const ElfFile = struct { self.shdr_table_offset = self.findFreeSpace(needed_size, phalign); } - const allocator = self.sections.allocator; switch (self.ptr_width) { .p32 => { - const buf = try allocator.alloc(elf.Elf32_Shdr, self.sections.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf32_Shdr, self.sections.items.len); + defer self.allocator.free(buf); for (buf) |*shdr, i| { shdr.* = sectHeaderTo32(self.sections.items[i]); @@ -447,8 +500,8 @@ pub const ElfFile = struct { try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, .p64 => { - const buf = try allocator.alloc(elf.Elf64_Shdr, self.sections.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf64_Shdr, self.sections.items.len); + defer self.allocator.free(buf); for (buf) |*shdr, i| { shdr.* = self.sections.items[i]; @@ -460,6 +513,7 @@ pub const ElfFile = struct { try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, } + self.shdr_table_dirty = false; } if (self.entry_addr == null and self.options.output_mode == .Exe) { self.error_flags.no_entry_point_found = true; @@ -470,11 +524,11 @@ pub const ElfFile = struct { // TODO find end pos and truncate // The point of flush() is to commit changes, so nothing should be dirty after this. - assert(!self.phdr_load_re_dirty); assert(!self.phdr_table_dirty); assert(!self.shdr_table_dirty); assert(!self.shstrtab_dirty); - assert(!self.symtab_dirty); + assert(!self.symbol_count_dirty); + assert(!self.offset_table_count_dirty); } fn writeElfHeader(self: *ElfFile) !void { @@ -608,6 +662,7 @@ pub const ElfFile = struct { const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; const shdr = &self.sections.items[self.text_section_index.?]; + // TODO Also detect virtual address collisions. const text_capacity = self.allocatedSize(shdr.sh_offset); // TODO instead of looping here, maintain a free list and a pointer to the end. const end_vaddr = blk: { @@ -664,7 +719,7 @@ pub const ElfFile = struct { defer code.deinit(); const typed_value = decl.typed_value.most_recent.typed_value; - const err_msg = try codegen.generateSymbol(typed_value, module, &code, module.allocator); + const err_msg = try codegen.generateSymbol(typed_value, module, &code); if (err_msg != null) |em| { decl.analysis = .codegen_failure; _ = try module.failed_decls.put(decl, em); @@ -678,26 +733,31 @@ pub const ElfFile = struct { else => elf.STT_OBJECT, }; - if (decl.link.local_sym_index) |local_sym_index| { - const local_sym = &self.symbols.items[local_sym_index]; + if (decl.link.local_sym_index != 0) { + const local_sym = &self.symbols.items[decl.link.local_sym_index]; const existing_block = self.findAllocatedTextBlock(local_sym); const file_offset = if (code_size > existing_block.size_capacity) fo: { - const new_block = self.allocateTextBlock(code_size); + const new_block = try self.allocateTextBlock(code_size); local_sym.st_value = new_block.vaddr; local_sym.st_size = code_size; + + try self.writeOffsetTableEntry(decl.link.offset_table_index); + break :fo new_block.file_offset; } else existing_block.file_offset; local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(u8, decl.name)); local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; // TODO this write could be avoided if no fields of the symbol were changed. - try self.writeSymbol(local_sym_index); + try self.writeSymbol(decl.link.local_sym_index); break :blk file_offset; } else { try self.symbols.ensureCapacity(self.symbols.items.len + 1); + try self.offset_table.ensureCapacity(self.offset_table.items.len + 1); const decl_name = mem.spanZ(u8, decl.name); const name_str_index = try self.makeString(decl_name); - const new_block = self.allocateTextBlock(code_size); + const new_block = try self.allocateTextBlock(code_size); const local_sym_index = self.symbols.items.len; + const offset_table_index = self.offset_table.items.len; self.symbols.appendAssumeCapacity(self.allocator, .{ .st_name = name_str_index, @@ -708,10 +768,17 @@ pub const ElfFile = struct { .st_size = code_size, }); errdefer self.symbols.shrink(self.symbols.items.len - 1); + self.offset_table.appendAssumeCapacity(self.allocator, new_block.vaddr); + errdefer self.offset_table.shrink(self.offset_table.items.len - 1); try self.writeSymbol(local_sym_index); + try self.writeOffsetTableEntry(offset_table_index); self.symbol_count_dirty = true; - decl.link.local_sym_index = local_sym_index; + self.offset_table_count_dirty = true; + decl.link = .{ + .local_sym_index = local_sym_index, + .offset_table_index = offset_table_index, + }; break :blk new_block.file_offset; } @@ -839,6 +906,45 @@ pub const ElfFile = struct { } } + fn writeOffsetTableEntry(self: *ElfFile, index: usize) !void { + const shdr = &self.sections.items[self.got_section_index.?]; + const phdr = &self.program_headers.items[self.phdr_got_index.?]; + if (self.offset_table_count_dirty) { + // TODO Also detect virtual address collisions. + const allocated_size = self.allocatedSize(shdr.sh_offset); + const needed_size = self.symbols.items.len * shdr.sh_entsize; + if (needed_size > allocated_size) { + // Must move the entire got section. + const new_offset = self.findFreeSpace(needed_size, shdr.sh_entsize); + const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, shdr.sh_size); + if (amt != text_size) return error.InputOutput; + shdr.sh_offset = new_offset; + } + shdr.sh_size = needed_size; + phdr.p_memsz = needed_size; + phdr.p_filesz = needed_size; + + self.shdr_table_dirty = true; // TODO look into making only the one section dirty + self.phdr_table_dirty = true; // TODO look into making only the one program header dirty + + self.offset_table_count_dirty = false; + } + const endian = self.options.target.cpu.arch.endian(); + const off = shdr.sh_offset + shdr.sh_entsize * index; + switch (self.ptr_width) { + .p32 => { + var buf: [4]u8 = undefined; + mem.writeInt(u32, &buf, @intCast(u32, self.offset_table.items[index]), endian); + try self.file.pwriteAll(&buf, off); + }, + .p64 => { + var buf: [8]u8 = undefined; + mem.writeInt(u64, &buf, self.offset_table.items[index], endian); + try self.file.pwriteAll(&buf, off); + }, + } + } + fn writeSymbol(self: *ElfFile, index: usize) !void { const syms_sect = &self.sections.items[self.symtab_section_index.?]; // Make sure we are not pointlessly writing symbol data that will have to get relocated @@ -849,6 +955,9 @@ pub const ElfFile = struct { if (needed_size > allocated_size) { return self.writeAllSymbols(); } + syms_sect.sh_info = @intCast(u32, self.symbols.items.len); + self.shdr_table_dirty = true; // TODO look into only writing one section + self.symbol_count_dirty = false; } const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); switch (self.ptr_width) { @@ -896,12 +1005,13 @@ pub const ElfFile = struct { //std.debug.warn("symtab start=0x{x} end=0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size }); syms_sect.sh_size = needed_size; syms_sect.sh_info = @intCast(u32, self.symbols.items.len); - const allocator = self.symbols.allocator; + self.symbol_count_dirty = false; + self.shdr_table_dirty = true; // TODO look into only writing one section const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); switch (self.ptr_width) { .p32 => { - const buf = try allocator.alloc(elf.Elf32_Sym, self.symbols.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf32_Sym, self.symbols.items.len); + defer self.allocator.free(buf); for (buf) |*sym, i| { sym.* = .{ @@ -919,8 +1029,8 @@ pub const ElfFile = struct { try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); }, .p64 => { - const buf = try allocator.alloc(elf.Elf64_Sym, self.symbols.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf64_Sym, self.symbols.items.len); + defer self.allocator.free(buf); for (buf) |*sym, i| { sym.* = .{ @@ -961,12 +1071,11 @@ pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !El .allocator = allocator, .file = file, .options = options, - .ptr_width = switch (self.options.target.cpu.arch.ptrBitWidth()) { + .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { 32 => .p32, 64 => .p64, else => return error.UnsupportedELFArchitecture, }, - .symtab_dirty = true, .shdr_table_dirty = true, }; errdefer self.deinit(); @@ -1018,7 +1127,7 @@ fn openBinFileInner(allocator: *Allocator, file: fs.File, options: Options) !Elf .allocator = allocator, .file = file, .options = options, - .ptr_width = switch (self.options.target.cpu.arch.ptrBitWidth()) { + .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { 32 => .p32, 64 => .p64, else => return error.UnsupportedELFArchitecture, From a3da584248c1152c01a1a7f878c164fb19b8e04a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 12 May 2020 23:59:46 -0400 Subject: [PATCH 06/31] self-hosted: ir: implement separated analysis of Decl and Fn --- src-self-hosted/ir.zig | 333 +++++++++++++++++++++--------------- src-self-hosted/ir/text.zig | 22 +-- src-self-hosted/value.zig | 2 +- 3 files changed, 209 insertions(+), 148 deletions(-) diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 52de991441..dd219229f5 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -2,7 +2,6 @@ const std = @import("std"); const mem = std.mem; const Allocator = std.mem.Allocator; const ArrayListUnmanaged = std.ArrayListUnmanaged; -const LinkedList = std.TailQueue; const Value = @import("value.zig").Value; const Type = @import("type.zig").Type; const TypedValue = @import("TypedValue.zig"); @@ -168,17 +167,6 @@ pub const Inst = struct { }; }; -fn swapRemoveElem(allocator: *Allocator, comptime T: type, item: T, list: *ArrayListUnmanaged(T)) void { - var i: usize = 0; - while (i < list.items.len) { - if (list.items[i] == item) { - list.swapRemove(allocator, i); - continue; - } - i += 1; - } -} - pub const Module = struct { /// General-purpose allocator. allocator: *Allocator, @@ -203,6 +191,8 @@ pub const Module = struct { optimize_mode: std.builtin.Mode, link_error_flags: link.ElfFile.ErrorFlags = link.ElfFile.ErrorFlags{}, + work_stack: ArrayListUnmanaged(WorkItem) = ArrayListUnmanaged(WorkItem){}, + /// We optimize memory usage for a compilation with no compile errors by storing the /// error messages and mapping outside of `Decl`. /// The ErrorMsg memory is owned by the decl, using Module's allocator. @@ -218,6 +208,11 @@ pub const Module = struct { /// The ErrorMsg memory is owned by the `Export`, using Module's allocator. failed_exports: std.AutoHashMap(*Export, *ErrorMsg), + pub const WorkItem = union(enum) { + /// Write the machine code for a Decl to the output file. + codegen_decl: *Decl, + }; + pub const Export = struct { options: std.builtin.ExportOptions, /// Byte offset into the file that contains the export directive. @@ -322,11 +317,13 @@ pub const Module = struct { } }; - /// Memory is managed by the arena of the owning Decl. + /// Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator. pub const Fn = struct { + /// This memory owned by the Decl's TypedValue.Managed arena allocator. fn_type: Type, analysis: union(enum) { - queued, + /// The value is the source instruction. + queued: *text.Inst.Fn, in_progress: *Analysis, /// There will be a corresponding ErrorMsg in Module.failed_fns failure, @@ -336,11 +333,14 @@ pub const Module = struct { /// self-hosted supports proper struct types and Zig AST => ZIR. scope: *Scope.ZIRModule, - /// This memory managed by the general purpose allocator. + /// This memory is temporary and points to stack memory for the duration + /// of Fn analysis. pub const Analysis = struct { inner_block: Scope.Block, /// null value means a semantic analysis error happened. inst_table: std.AutoHashMap(*text.Inst, ?*Inst), + /// Owns the memory for instructions + arena: std.heap.ArenaAllocator, }; }; @@ -354,6 +354,26 @@ pub const Module = struct { return @fieldParentPtr(T, "base", base); } + /// Asserts the scope has a parent which is a DeclAnalysis and + /// returns the arena Allocator. + pub fn arena(self: *Scope) *Allocator { + switch (self.tag) { + .block => return self.cast(Block).?.arena, + .decl => return &self.cast(DeclAnalysis).?.arena.allocator, + .zir_module => unreachable, + } + } + + /// Asserts the scope has a parent which is a DeclAnalysis and + /// returns the Decl. + pub fn decl(self: *Scope) *Decl { + switch (self.tag) { + .block => return self.cast(Block).?.decl, + .decl => return self.cast(DeclAnalysis).?.decl, + .zir_module => unreachable, + } + } + pub const Tag = enum { zir_module, block, @@ -404,7 +424,10 @@ pub const Module = struct { pub const base_tag: Tag = .block; base: Scope = Scope{ .tag = base_tag }, func: *Fn, + decl: *Decl, instructions: ArrayListUnmanaged(*Inst), + /// Points to the arena allocator of DeclAnalysis + arena: *Allocator, }; /// This is a temporary structure, references to it are valid only @@ -413,6 +436,7 @@ pub const Module = struct { pub const base_tag: Tag = .decl; base: Scope = Scope{ .tag = base_tag }, decl: *Decl, + arena: std.heap.ArenaAllocator, }; }; @@ -616,21 +640,62 @@ pub const Module = struct { // Here we ensure enough queue capacity to store all the decls, so that later we can use // appendAssumeCapacity. - try self.analysis_queue.ensureCapacity(self.analysis_queue.items.len + contents.module.decls.len); + try self.work_stack.ensureCapacity( + self.allocator, + self.work_stack.items.len + src_module.decls.len, + ); - for (contents.module.decls) |decl| { + for (src_module.decls) |decl| { if (decl.cast(text.Inst.Export)) |export_inst| { try analyzeExport(self, &root_scope.base, export_inst); } } - while (self.analysis_queue.popOrNull()) |work_item| { - switch (work_item) { - .decl => |decl| switch (decl.analysis) { - .success => try self.bin_file.updateDecl(self, decl), + while (self.work_stack.pop()) |work_item| switch (work_item) { + .codegen_decl => |decl| switch (decl.analysis) { + .success => { + if (decl.typed_value.most_recent.typed_value.val.cast(Value.Function)) |payload| { + switch (payload.func.analysis) { + .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { + error.AnalysisFail => { + assert(func_payload.func.analysis == .failure); + continue; + }, + else => |e| return e, + }, + .in_progress => unreachable, + .failure => continue, + .success => {}, + } + } + try self.bin_file.updateDecl(self, decl); }, - } - } + }, + }; + } + + fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { + // Use the Decl's arena for function memory. + var arena = decl.typed_value.most_recent.arena.?.promote(self.allocator); + defer decl.typed_value.most_recent.arena.?.* = arena.state; + var analysis: Analysis = .{ + .inner_block = .{ + .func = func, + .decl = decl, + .instructions = .{}, + .arena = &arena.allocator, + }, + .inst_table = std.AutoHashMap(*text.Inst, ?*Inst).init(self.allocator), + }; + defer analysis.inner_block.instructions.deinit(); + defer analysis.inst_table.deinit(); + + const fn_inst = func.analysis.queued; + func.analysis = .{ .in_progress = &analysis }; + + try self.analyzeBody(&analysis.inner_block, fn_inst.positionals.body); + + func.analysis = .{ .success = .{ .instructions = analysis.inner_block.instructions.toOwnedSlice() } }; } fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { @@ -656,19 +721,27 @@ pub const Module = struct { }; var decl_scope: Scope.DeclAnalysis = .{ - .base = .{ .parent = scope }, .decl = new_decl, + .arena = std.heap.ArenaAllocator.init(self.allocator), }; - const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { - error.AnalysisFail => { - assert(new_decl.analysis == .failure); - return error.AnalysisFail; + errdefer decl_scope.arena.deinit(); + + const arena_state = try self.allocator.create(std.heap.ArenaAllocator.State); + errdefer self.allocator.destroy(arena_state); + + const typed_value = try self.analyzeInstConst(&decl_scope.base, old_inst); + + arena_state.* = decl_scope.arena; + + new_decl.typed_value = .{ + .most_recent = .{ + .typed_value = typed_value, + .arena = arena_state, }, - else => |e| return e, }; - new_decl.analysis = .{ .success = typed_value }; + new_decl.analysis = .complete; // We ensureCapacity when scanning for decls. - self.analysis_queue.appendAssumeCapacity(.{ .decl = new_decl }); + self.work_stack.appendAssumeCapacity(self.allocator, .{ .codegen_decl = new_decl }); return new_decl; } } @@ -708,7 +781,7 @@ pub const Module = struct { fn resolveInstConst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!TypedValue { const new_inst = try self.resolveInst(scope, old_inst); - const val = try self.resolveConstValue(new_inst); + const val = try self.resolveConstValue(scope, new_inst); return TypedValue{ .ty = new_inst.ty, .val = val, @@ -716,7 +789,7 @@ pub const Module = struct { } fn resolveConstValue(self: *Module, scope: *Scope, base: *Inst) !Value { - return (try self.resolveDefinedValue(base)) orelse + return (try self.resolveDefinedValue(scope, base)) orelse return self.fail(scope, base.src, "unable to resolve comptime value", .{}); } @@ -734,15 +807,15 @@ pub const Module = struct { const new_inst = try self.resolveInst(scope, old_inst); const wanted_type = Type.initTag(.const_slice_u8); const coerced_inst = try self.coerce(scope, wanted_type, new_inst); - const val = try self.resolveConstValue(coerced_inst); - return val.toAllocatedBytes(&self.arena.allocator); + const val = try self.resolveConstValue(scope, coerced_inst); + return val.toAllocatedBytes(scope.arena()); } fn resolveType(self: *Module, scope: *Scope, old_inst: *text.Inst) !Type { const new_inst = try self.resolveInst(scope, old_inst); const wanted_type = Type.initTag(.@"type"); const coerced_inst = try self.coerce(scope, wanted_type, new_inst); - const val = try self.resolveConstValue(coerced_inst); + const val = try self.resolveConstValue(scope, coerced_inst); return val.toType(); } @@ -764,7 +837,7 @@ pub const Module = struct { const new_export = try self.allocator.create(Export); errdefer self.allocator.destroy(new_export); - const owner_decl = scope.getDecl(); + const owner_decl = scope.decl(); new_export.* = .{ .options = .{ .data = .{ .name = symbol_name } }, @@ -810,7 +883,7 @@ pub const Module = struct { } fn addNewInst(self: *Module, block: *Scope.Block, src: usize, ty: Type, comptime T: type) !*T { - const inst = try self.arena.allocator.create(T); + const inst = try block.arena.create(T); inst.* = .{ .base = .{ .tag = T.base_tag, @@ -823,8 +896,8 @@ pub const Module = struct { return inst; } - fn constInst(self: *Module, src: usize, typed_value: TypedValue) !*Inst { - const const_inst = try self.arena.allocator.create(Inst.Constant); + fn constInst(self: *Module, scope: *Scope, src: usize, typed_value: TypedValue) !*Inst { + const const_inst = try scope.arena().create(Inst.Constant); const_inst.* = .{ .base = .{ .tag = Inst.Constant.base_tag, @@ -836,71 +909,71 @@ pub const Module = struct { return &const_inst.base; } - fn constStr(self: *Module, src: usize, str: []const u8) !*Inst { - const array_payload = try self.arena.allocator.create(Type.Payload.Array_u8_Sentinel0); + fn constStr(self: *Module, scope: *Scope, src: usize, str: []const u8) !*Inst { + const array_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); array_payload.* = .{ .len = str.len }; - const ty_payload = try self.arena.allocator.create(Type.Payload.SingleConstPointer); + const ty_payload = try scope.arena().create(Type.Payload.SingleConstPointer); ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) }; - const bytes_payload = try self.arena.allocator.create(Value.Payload.Bytes); + const bytes_payload = try scope.arena().create(Value.Payload.Bytes); bytes_payload.* = .{ .data = str }; - return self.constInst(src, .{ + return self.constInst(scope, src, .{ .ty = Type.initPayload(&ty_payload.base), .val = Value.initPayload(&bytes_payload.base), }); } - fn constType(self: *Module, src: usize, ty: Type) !*Inst { - return self.constInst(src, .{ + fn constType(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { + return self.constInst(scope, src, .{ .ty = Type.initTag(.type), - .val = try ty.toValue(&self.arena.allocator), + .val = try ty.toValue(scope.arena()), }); } - fn constVoid(self: *Module, src: usize) !*Inst { - return self.constInst(src, .{ + fn constVoid(self: *Module, scope: *Scope, src: usize) !*Inst { + return self.constInst(scope, src, .{ .ty = Type.initTag(.void), .val = Value.initTag(.the_one_possible_value), }); } - fn constUndef(self: *Module, src: usize, ty: Type) !*Inst { - return self.constInst(src, .{ + fn constUndef(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { + return self.constInst(scope, src, .{ .ty = ty, .val = Value.initTag(.undef), }); } - fn constBool(self: *Module, src: usize, v: bool) !*Inst { - return self.constInst(src, .{ + fn constBool(self: *Module, scope: *Scope, src: usize, v: bool) !*Inst { + return self.constInst(scope, src, .{ .ty = Type.initTag(.bool), .val = ([2]Value{ Value.initTag(.bool_false), Value.initTag(.bool_true) })[@boolToInt(v)], }); } - fn constIntUnsigned(self: *Module, src: usize, ty: Type, int: u64) !*Inst { - const int_payload = try self.arena.allocator.create(Value.Payload.Int_u64); + fn constIntUnsigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: u64) !*Inst { + const int_payload = try scope.arena().create(Value.Payload.Int_u64); int_payload.* = .{ .int = int }; - return self.constInst(src, .{ + return self.constInst(scope, src, .{ .ty = ty, .val = Value.initPayload(&int_payload.base), }); } - fn constIntSigned(self: *Module, src: usize, ty: Type, int: i64) !*Inst { - const int_payload = try self.arena.allocator.create(Value.Payload.Int_i64); + fn constIntSigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: i64) !*Inst { + const int_payload = try scope.arena().create(Value.Payload.Int_i64); int_payload.* = .{ .int = int }; - return self.constInst(src, .{ + return self.constInst(scope, src, .{ .ty = ty, .val = Value.initPayload(&int_payload.base), }); } - fn constIntBig(self: *Module, src: usize, ty: Type, big_int: BigIntConst) !*Inst { + fn constIntBig(self: *Module, scope: *Scope, src: usize, ty: Type, big_int: BigIntConst) !*Inst { const val_payload = if (big_int.positive) blk: { if (big_int.to(u64)) |x| { return self.constIntUnsigned(src, ty, x); @@ -908,7 +981,7 @@ pub const Module = struct { error.NegativeIntoUnsigned => unreachable, error.TargetTooSmall => {}, // handled below } - const big_int_payload = try self.arena.allocator.create(Value.Payload.IntBigPositive); + const big_int_payload = try scope.arena().create(Value.Payload.IntBigPositive); big_int_payload.* = .{ .limbs = big_int.limbs }; break :blk &big_int_payload.base; } else blk: { @@ -918,12 +991,12 @@ pub const Module = struct { error.NegativeIntoUnsigned => unreachable, error.TargetTooSmall => {}, // handled below } - const big_int_payload = try self.arena.allocator.create(Value.Payload.IntBigNegative); + const big_int_payload = try scope.arena().create(Value.Payload.IntBigNegative); big_int_payload.* = .{ .limbs = big_int.limbs }; break :blk &big_int_payload.base; }; - return self.constInst(src, .{ + return self.constInst(scope, src, .{ .ty = ty, .val = Value.initPayload(val_payload), }); @@ -958,11 +1031,10 @@ pub const Module = struct { .@"asm" => return self.analyzeInstAsm(scope, old_inst.cast(text.Inst.Asm).?), .@"unreachable" => return self.analyzeInstUnreachable(scope, old_inst.cast(text.Inst.Unreachable).?), .@"return" => return self.analyzeInstRet(scope, old_inst.cast(text.Inst.Return).?), - // TODO postpone function analysis until later .@"fn" => return self.analyzeInstFn(scope, old_inst.cast(text.Inst.Fn).?), .@"export" => { try self.analyzeExport(scope, old_inst.cast(text.Inst.Export).?); - return self.constVoid(old_inst.src); + return self.constVoid(scope, old_inst.src); }, .primitive => return self.analyzeInstPrimitive(old_inst.cast(text.Inst.Primitive).?), .fntype => return self.analyzeInstFnType(scope, old_inst.cast(text.Inst.FnType).?), @@ -1033,7 +1105,7 @@ pub const Module = struct { defer self.allocator.free(fn_param_types); func.ty.fnParamTypes(fn_param_types); - const casted_args = try self.arena.allocator.alloc(*Inst, fn_params_len); + const casted_args = try scope.arena().alloc(*Inst, fn_params_len); for (inst.positionals.args) |src_arg, i| { const uncasted_arg = try self.resolveInst(scope, src_arg); casted_args[i] = try self.coerce(scope, fn_param_types[i], uncasted_arg); @@ -1048,36 +1120,15 @@ pub const Module = struct { fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *text.Inst.Fn) InnerError!*Inst { const fn_type = try self.resolveType(scope, fn_inst.positionals.fn_type); - - var new_func: Fn = .{ - .fn_index = self.fns.items.len, - .inner_block = .{ - .func = undefined, - .instructions = .{}, - }, - .inst_table = std.AutoHashMap(*text.Inst, ?*Inst).init(self.allocator), - }; - new_func.inner_block.func = &new_func; - defer new_func.inner_block.instructions.deinit(); - defer new_func.inst_table.deinit(); - // Don't hang on to a reference to this when analyzing body instructions, since the memory - // could become invalid. - (try self.fns.addOne(self.allocator)).* = .{ - .analysis_status = .in_progress, + const new_func = try scope.arena().create(Fn); + new_func.* = .{ .fn_type = fn_type, - .body = undefined, + .analysis = .{ .queued = fn_inst.positionals.body }, + .scope = scope.namespace(), }; - - try self.analyzeBody(&new_func.inner_block, fn_inst.positionals.body); - - const f = &self.fns.items[new_func.fn_index]; - f.analysis_status = .success; - f.body = .{ .instructions = new_func.inner_block.instructions.toOwnedSlice() }; - - const fn_payload = try self.arena.allocator.create(Value.Payload.Function); - fn_payload.* = .{ .index = new_func.fn_index }; - - return self.constInst(fn_inst.base.src, .{ + const fn_payload = try scope.arena().create(Value.Payload.Function); + fn_payload.* = .{ .func = new_func }; + return self.constInst(scope, fn_inst.base.src, .{ .ty = fn_type, .val = Value.initPayload(&fn_payload.base), }); @@ -1142,13 +1193,13 @@ pub const Module = struct { switch (elem_ty.zigTypeTag()) { .Array => { if (mem.eql(u8, field_name, "len")) { - const len_payload = try self.arena.allocator.create(Value.Payload.Int_u64); + const len_payload = try scope.arena().create(Value.Payload.Int_u64); len_payload.* = .{ .int = elem_ty.arrayLen() }; - const ref_payload = try self.arena.allocator.create(Value.Payload.RefVal); + const ref_payload = try scope.arena().create(Value.Payload.RefVal); ref_payload.* = .{ .val = Value.initPayload(&len_payload.base) }; - return self.constInst(fieldptr.base.src, .{ + return self.constInst(scope, fieldptr.base.src, .{ .ty = Type.initTag(.single_const_pointer_to_comptime_int), .val = Value.initPayload(&ref_payload.base), }); @@ -1217,12 +1268,12 @@ pub const Module = struct { const index_u64 = index_val.toUnsignedInt(); // @intCast here because it would have been impossible to construct a value that // required a larger index. - const elem_ptr = try array_ptr_val.elemPtr(&self.arena.allocator, @intCast(usize, index_u64)); + const elem_ptr = try array_ptr_val.elemPtr(scope.arena(), @intCast(usize, index_u64)); - const type_payload = try self.arena.allocator.create(Type.Payload.SingleConstPointer); + const type_payload = try scope.arena().create(Type.Payload.SingleConstPointer); type_payload.* = .{ .pointee_type = array_ptr.ty.elemType().elemType() }; - return self.constInst(inst.base.src, .{ + return self.constInst(scope, inst.base.src, .{ .ty = Type.initPayload(&type_payload.base), .val = elem_ptr, }); @@ -1246,7 +1297,7 @@ pub const Module = struct { var rhs_space: Value.BigIntSpace = undefined; const lhs_bigint = lhs_val.toBigInt(&lhs_space); const rhs_bigint = rhs_val.toBigInt(&rhs_space); - const limbs = try self.arena.allocator.alloc( + const limbs = try scope.arena().alloc( std.math.big.Limb, std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1, ); @@ -1259,16 +1310,16 @@ pub const Module = struct { } const val_payload = if (result_bigint.positive) blk: { - const val_payload = try self.arena.allocator.create(Value.Payload.IntBigPositive); + const val_payload = try scope.arena().create(Value.Payload.IntBigPositive); val_payload.* = .{ .limbs = result_limbs }; break :blk &val_payload.base; } else blk: { - const val_payload = try self.arena.allocator.create(Value.Payload.IntBigNegative); + const val_payload = try scope.arena().create(Value.Payload.IntBigNegative); val_payload.* = .{ .limbs = result_limbs }; break :blk &val_payload.base; }; - return self.constInst(inst.base.src, .{ + return self.constInst(scope, inst.base.src, .{ .ty = lhs.ty, .val = Value.initPayload(val_payload), }); @@ -1286,7 +1337,7 @@ pub const Module = struct { else => return self.fail(scope, deref.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}), }; if (ptr.value()) |val| { - return self.constInst(deref.base.src, .{ + return self.constInst(scope, deref.base.src, .{ .ty = elem_ty, .val = val.pointerDeref(), }); @@ -1300,9 +1351,9 @@ pub const Module = struct { const asm_source = try self.resolveConstString(scope, assembly.positionals.asm_source); const output = if (assembly.kw_args.output) |o| try self.resolveConstString(scope, o) else null; - const inputs = try self.arena.allocator.alloc([]const u8, assembly.kw_args.inputs.len); - const clobbers = try self.arena.allocator.alloc([]const u8, assembly.kw_args.clobbers.len); - const args = try self.arena.allocator.alloc(*Inst, assembly.kw_args.args.len); + const inputs = try scope.arena().alloc([]const u8, assembly.kw_args.inputs.len); + const clobbers = try scope.arena().alloc([]const u8, assembly.kw_args.clobbers.len); + const args = try scope.arena().alloc(*Inst, assembly.kw_args.args.len); for (inputs) |*elem, i| { elem.* = try self.resolveConstString(scope, assembly.kw_args.inputs[i]); @@ -1408,15 +1459,16 @@ pub const Module = struct { const uncasted_cond = try self.resolveInst(scope, inst.positionals.condition); const cond = try self.coerce(scope, Type.initTag(.bool), uncasted_cond); - if (try self.resolveDefinedValue(cond)) |cond_val| { + if (try self.resolveDefinedValue(scope, cond)) |cond_val| { const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; try self.analyzeBody(scope, body.*); - return self.constVoid(inst.base.src); + return self.constVoid(scope, inst.base.src); } const parent_block = try self.requireRuntimeBlock(scope, inst.base.src); var true_block: Scope.Block = .{ + .base = .{ .parent = scope }, .func = parent_block.func, .instructions = .{}, }; @@ -1424,6 +1476,7 @@ pub const Module = struct { try self.analyzeBody(&true_block.base, inst.positionals.true_body); var false_block: Scope.Block = .{ + .base = .{ .parent = scope }, .func = parent_block.func, .instructions = .{}, }; @@ -1431,8 +1484,8 @@ pub const Module = struct { try self.analyzeBody(&false_block.base, inst.positionals.false_body); // Copy the instruction pointers to the arena memory - const true_instructions = try self.arena.allocator.alloc(*Inst, true_block.instructions.items.len); - const false_instructions = try self.arena.allocator.alloc(*Inst, false_block.instructions.items.len); + const true_instructions = try scope.arena().alloc(*Inst, true_block.instructions.items.len); + const false_instructions = try scope.arena().alloc(*Inst, false_block.instructions.items.len); mem.copy(*Inst, true_instructions, true_block.instructions.items); mem.copy(*Inst, false_instructions, false_block.instructions.items); @@ -1586,7 +1639,7 @@ pub const Module = struct { var lhs_bits: usize = undefined; if (lhs.value()) |lhs_val| { if (lhs_val.isUndef()) - return self.constUndef(src, Type.initTag(.bool)); + return self.constUndef(scope, src, Type.initTag(.bool)); const is_unsigned = if (lhs_is_float) x: { var bigint_space: Value.BigIntSpace = undefined; var bigint = try lhs_val.toBigInt(&bigint_space).toManaged(self.allocator); @@ -1621,7 +1674,7 @@ pub const Module = struct { var rhs_bits: usize = undefined; if (rhs.value()) |rhs_val| { if (rhs_val.isUndef()) - return self.constUndef(src, Type.initTag(.bool)); + return self.constUndef(scope, src, Type.initTag(.bool)); const is_unsigned = if (rhs_is_float) x: { var bigint_space: Value.BigIntSpace = undefined; var bigint = try rhs_val.toBigInt(&bigint_space).toManaged(self.allocator); @@ -1670,13 +1723,13 @@ pub const Module = struct { }); } - fn makeIntType(self: *Module, signed: bool, bits: u16) !Type { + fn makeIntType(self: *Module, scope: *Scope, signed: bool, bits: u16) !Type { if (signed) { - const int_payload = try self.arena.allocator.create(Type.Payload.IntSigned); + const int_payload = try scope.arena().create(Type.Payload.IntSigned); int_payload.* = .{ .bits = bits }; return Type.initPayload(&int_payload.base); } else { - const int_payload = try self.arena.allocator.create(Type.Payload.IntUnsigned); + const int_payload = try scope.arena().create(Type.Payload.IntUnsigned); int_payload.* = .{ .bits = bits }; return Type.initPayload(&int_payload.base); } @@ -1701,7 +1754,7 @@ pub const Module = struct { if (array_type.zigTypeTag() == .Array and coerceInMemoryAllowed(dst_elem_type, array_type.elemType()) == .ok) { - return self.coerceArrayPtrToSlice(dest_type, inst); + return self.coerceArrayPtrToSlice(scope, dest_type, inst); } } @@ -1712,7 +1765,7 @@ pub const Module = struct { if (!val.intFitsInType(dest_type, self.target())) { return self.fail(scope, inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); } - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); } // integer widening @@ -1721,7 +1774,7 @@ pub const Module = struct { const dst_info = dest_type.intInfo(self.target()); if (src_info.signed == dst_info.signed and dst_info.bits >= src_info.bits) { if (inst.value()) |val| { - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); } else { return self.fail(scope, inst.src, "TODO implement runtime integer widening", .{}); } @@ -1736,33 +1789,41 @@ pub const Module = struct { fn bitcast(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { if (inst.value()) |val| { // Keep the comptime Value representation; take the new type. - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); } // TODO validate the type size and other compile errors const b = try self.requireRuntimeBlock(scope, inst.src); return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst }); } - fn coerceArrayPtrToSlice(self: *Module, dest_type: Type, inst: *Inst) !*Inst { + fn coerceArrayPtrToSlice(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { if (inst.value()) |val| { // The comptime Value representation is compatible with both types. - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); } return self.fail(scope, inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); } fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { @setCold(true); - const err_msg = ErrorMsg{ - .byte_offset = src, - .msg = try std.fmt.allocPrint(self.allocator, format, args), - }; - if (scope.cast(Scope.Block)) |block| { - block.func.analysis = .{ .failure = err_msg }; - } else if (scope.cast(Scope.Decl)) |scope_decl| { - scope_decl.decl.analysis = .{ .failure = err_msg }; - } else { - unreachable; + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_fns.ensureCapacity(self.failed_fns.size + 1); + const err_msg = try ErrorMsg.create(self.allocator, src, format, args); + switch (scope.tag) { + .decl => { + const decl = scope.cast(Scope.DeclAnalysis).?.decl; + switch (decl.analysis) { + .initial_in_progress => decl.analysis = .initial_sema_failure, + .repeat_in_progress => decl.analysis = .repeat_sema_failure, + else => unreachable, + } + self.failed_decls.putAssumeCapacityNoClobber(decl, err_msg); + }, + .block => { + const func = scope.cast(Scope.Block).?.func; + func.analysis = .failure; + self.failed_fns.putAssumeCapacityNoClobber(func, err_msg); + }, } return error.AnalysisFail; } @@ -1788,8 +1849,8 @@ pub const ErrorMsg = struct { pub fn create(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !*ErrorMsg { const self = try allocator.create(ErrorMsg); - errdefer allocator.destroy(ErrorMsg); - self.* = init(allocator, byte_offset, format, args); + errdefer allocator.destroy(self); + self.* = try init(allocator, byte_offset, format, args); return self; } diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index d87fef7a2d..74fe57bcf2 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -631,7 +631,7 @@ const Parser = struct { if (try body_context.name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); } - try body_context.instructions.append(inst); + try body_context.instructions.append(self.allocator, inst); continue; }, ' ', '\n' => continue, @@ -717,7 +717,7 @@ const Parser = struct { if (try self.global_name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); } - try self.decls.append(inst); + try self.decls.append(self.allocator, inst); }, ' ', '\n' => self.i += 1, 0 => break, @@ -885,7 +885,7 @@ const Parser = struct { var instructions = std.ArrayList(*Inst).init(&self.arena.allocator); while (true) { skipSpace(self); - try instructions.append(try parseParameterInst(self, body_ctx)); + try instructions.append(self.allocator, try parseParameterInst(self, body_ctx)); skipSpace(self); if (!eatByte(self, ',')) break; } @@ -991,7 +991,7 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - try self.decls.append(&export_inst.base); + try self.decls.append(self.allocator, &export_inst.base); } } @@ -1018,7 +1018,7 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - try self.decls.append(&int_inst.base); + try self.decls.append(self.allocator, &int_inst.base); return &int_inst.base; } @@ -1051,7 +1051,7 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - try self.decls.append(&as_inst.base); + try self.decls.append(self.allocator, &as_inst.base); return &as_inst.base; }, @@ -1085,7 +1085,7 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - try self.decls.append(&fn_inst.base); + try self.decls.append(self.allocator, &fn_inst.base); return &fn_inst.base; }, else => |t| std.debug.panic("TODO implement emitTypedValue for {}", .{@tagName(t)}), @@ -1258,7 +1258,7 @@ const EmitZIR = struct { break :blk &new_inst.base; }, }; - try instructions.append(new_inst); + try instructions.append(self.allocator, new_inst); try inst_table.putNoClobber(inst, new_inst); } } @@ -1310,7 +1310,7 @@ const EmitZIR = struct { .cc = ty.fnCallingConvention(), }, }; - try self.decls.append(&fntype_inst.base); + try self.decls.append(self.allocator, &fntype_inst.base); return &fntype_inst.base; }, else => std.debug.panic("TODO implement emitType for {}", .{ty}), @@ -1327,7 +1327,7 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - try self.decls.append(&primitive_inst.base); + try self.decls.append(self.allocator, &primitive_inst.base); return &primitive_inst.base; } @@ -1340,7 +1340,7 @@ const EmitZIR = struct { }, .kw_args = .{}, }; - try self.decls.append(&str_inst.base); + try self.decls.append(self.allocator, &str_inst.base); return &str_inst.base; } }; diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 1cad9cf129..387df6fe1b 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -160,7 +160,7 @@ pub const Value = extern union { .function => return out_stream.writeAll("(function)"), .decl_ref => return out_stream.writeAll("(decl ref)"), .elem_ptr => { - const elem_ptr = val.cast(Payload.Int_u64).?; + const elem_ptr = val.cast(Payload.ElemPtr).?; try out_stream.print("&[{}] ", .{elem_ptr.index}); val = elem_ptr.array_ptr; }, From 080022f6c670b0f74c39fe01096ebdbaafeda1b2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 13 May 2020 20:06:01 -0400 Subject: [PATCH 07/31] self-hosted: fix compile errors, except for codegen.zig --- lib/std/array_list.zig | 9 +- lib/std/mem.zig | 28 ++- src-self-hosted/TypedValue.zig | 2 +- src-self-hosted/codegen.zig | 3 +- src-self-hosted/ir.zig | 430 +++++++++++++++++++++------------ src-self-hosted/ir/text.zig | 186 ++++++++++---- src-self-hosted/link.zig | 90 ++++--- src-self-hosted/value.zig | 41 +++- 8 files changed, 525 insertions(+), 264 deletions(-) diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index d38bcab1f4..b9dea8a12f 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -269,13 +269,6 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { /// Bring-your-own allocator with every function call. /// Initialize directly and deinitialize with `deinit` or use `toOwnedSlice`. -pub fn init() Self { - return .{ - .items = &[_]T{}, - .capacity = 0, - }; -} - pub fn ArrayListUnmanaged(comptime T: type) type { return ArrayListAlignedUnmanaged(T, null); } @@ -317,7 +310,7 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ /// The caller owns the returned memory. ArrayList becomes empty. pub fn toOwnedSlice(self: *Self, allocator: *Allocator) Slice { const result = allocator.shrink(self.allocatedSlice(), self.items.len); - self.* = init(allocator); + self.* = Self{}; return result; } diff --git a/lib/std/mem.zig b/lib/std/mem.zig index a40334e587..0b5a6adfd9 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -279,6 +279,21 @@ pub const Allocator = struct { const shrink_result = self.shrinkFn(self, non_const_ptr[0..bytes_len], Slice.alignment, 0, 1); assert(shrink_result.len == 0); } + + /// Copies `m` to newly allocated memory. Caller owns the memory. + pub fn dupe(allocator: *Allocator, comptime T: type, m: []const T) ![]T { + const new_buf = try allocator.alloc(T, m.len); + copy(T, new_buf, m); + return new_buf; + } + + /// Copies `m` to newly allocated memory, with a null-terminated element. Caller owns the memory. + pub fn dupeZ(allocator: *Allocator, comptime T: type, m: []const T) ![:0]T { + const new_buf = try allocator.alloc(T, m.len + 1); + copy(T, new_buf, m); + new_buf[m.len] = 0; + return new_buf[0..m.len :0]; + } }; /// Copy all of source into dest at position 0. @@ -762,19 +777,14 @@ pub fn allEqual(comptime T: type, slice: []const T, scalar: T) bool { return true; } -/// Copies `m` to newly allocated memory. Caller owns the memory. +/// Deprecated, use `Allocator.dupe`. pub fn dupe(allocator: *Allocator, comptime T: type, m: []const T) ![]T { - const new_buf = try allocator.alloc(T, m.len); - copy(T, new_buf, m); - return new_buf; + return allocator.dupe(T, m); } -/// Copies `m` to newly allocated memory, with a null-terminated element. Caller owns the memory. +/// Deprecated, use `Allocator.dupeZ`. pub fn dupeZ(allocator: *Allocator, comptime T: type, m: []const T) ![:0]T { - const new_buf = try allocator.alloc(T, m.len + 1); - copy(T, new_buf, m); - new_buf[m.len] = 0; - return new_buf[0..m.len :0]; + return allocator.dupeZ(T, m); } /// Remove values from the beginning of a slice. diff --git a/src-self-hosted/TypedValue.zig b/src-self-hosted/TypedValue.zig index 0651ca9ec9..83a8f3c09f 100644 --- a/src-self-hosted/TypedValue.zig +++ b/src-self-hosted/TypedValue.zig @@ -16,7 +16,7 @@ pub const Managed = struct { /// If this is `null` then there is no memory management needed. arena: ?*std.heap.ArenaAllocator.State = null, - pub fn deinit(self: *ManagedTypedValue, allocator: *Allocator) void { + pub fn deinit(self: *Managed, allocator: *Allocator) void { if (self.arena) |a| a.promote(allocator).deinit(); self.* = undefined; } diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index eee0bc55d2..034ff9ffaa 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -4,10 +4,11 @@ const assert = std.debug.assert; const ir = @import("ir.zig"); const Type = @import("type.zig").Type; const Value = @import("value.zig").Value; +const TypedValue = @import("TypedValue.zig"); const Target = std.Target; const Allocator = mem.Allocator; -pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !?*ir.ErrorMsg { +pub fn generateSymbol(typed_value: TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !?*ir.ErrorMsg { switch (typed_value.ty.zigTypeTag()) { .Fn => { const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index dd219229f5..d7a2228f74 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -196,11 +196,9 @@ pub const Module = struct { /// We optimize memory usage for a compilation with no compile errors by storing the /// error messages and mapping outside of `Decl`. /// The ErrorMsg memory is owned by the decl, using Module's allocator. + /// Note that a Decl can succeed but the Fn it represents can fail. In this case, + /// a Decl can have a failed_decls entry but have analysis status of success. failed_decls: std.AutoHashMap(*Decl, *ErrorMsg), - /// We optimize memory usage for a compilation with no compile errors by storing the - /// error messages and mapping outside of `Fn`. - /// The ErrorMsg memory is owned by the `Fn`, using Module's allocator. - failed_fns: std.AutoHashMap(*Fn, *ErrorMsg), /// Using a map here for consistency with the other fields here. /// The ErrorMsg memory is owned by the `Scope.ZIRModule`, using Module's allocator. failed_files: std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg), @@ -221,7 +219,14 @@ pub const Module = struct { link: link.ElfFile.Export, /// The Decl that performs the export. Note that this is *not* the Decl being exported. owner_decl: *Decl, - status: enum { in_progress, failed, complete }, + status: enum { + in_progress, + failed, + /// Indicates that the failure was due to a temporary issue, such as an I/O error + /// when writing to the output file. Retrying the export may succeed. + failed_retryable, + complete, + }, }; pub const Decl = struct { @@ -260,6 +265,11 @@ pub const Module = struct { /// In this case the `typed_value.most_recent` can still be accessed. /// There will be a corresponding ErrorMsg in Module.failed_decls. codegen_failure, + /// In this case the `typed_value.most_recent` can still be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + /// This indicates the failure was something like running out of disk space, + /// and attempting codegen again may succeed. + codegen_failure_retryable, /// This Decl might be OK but it depends on another one which did not successfully complete /// semantic analysis. There is a most recent value available. repeat_dependency_failure, @@ -280,40 +290,63 @@ pub const Module = struct { /// The shallow set of other decls whose typed_value could possibly change if this Decl's /// typed_value is modified. /// TODO look into using a lightweight map/set data structure rather than a linear array. - dependants: ArrayListUnmanaged(*Decl) = .{}, - - pub fn typedValue(self: Decl) ?TypedValue { - switch (self.analysis) { - .initial_in_progress, - .initial_dependency_failure, - .initial_sema_failure, - => return null, - .codegen_failure, - .repeat_dependency_failure, - .repeat_sema_failure, - .repeat_in_progress, - .complete, - => return self.typed_value.most_recent, - } - } + dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){}, pub fn destroy(self: *Decl, allocator: *Allocator) void { - allocator.free(mem.spanZ(u8, self.name)); - if (self.typedValue()) |tv| tv.deinit(allocator); + allocator.free(mem.spanZ(self.name)); + if (self.typedValueManaged()) |tvm| { + tvm.deinit(allocator); + } allocator.destroy(self); } pub const Hash = [16]u8; + /// If the name is small enough, it is used directly as the hash. + /// If it is long, blake3 hash is computed. + pub fn hashSimpleName(name: []const u8) Hash { + var out: Hash = undefined; + if (name.len <= Hash.len) { + mem.copy(u8, &out, name); + mem.set(u8, out[name.len..], 0); + } else { + std.crypto.Blake3.hash(name, &out); + } + return out; + } + /// Must generate unique bytes with no collisions with other decls. /// The point of hashing here is only to limit the number of bytes of /// the unique identifier to a fixed size (16 bytes). pub fn fullyQualifiedNameHash(self: Decl) Hash { // Right now we only have ZIRModule as the source. So this is simply the // relative name of the decl. - var out: Hash = undefined; - std.crypto.Blake3.hash(mem.spanZ(u8, self.name), &out); - return out; + return hashSimpleName(mem.spanZ(u8, self.name)); + } + + pub fn typedValue(self: *Decl) error{AnalysisFail}!TypedValue { + const tvm = self.typedValueManaged() orelse return error.AnalysisFail; + return tvm.typed_value; + } + + pub fn value(self: *Decl) error{AnalysisFail}!Value { + return (try self.typedValue()).val; + } + + fn typedValueManaged(self: *Decl) ?*TypedValue.Managed { + switch (self.analysis) { + .initial_in_progress, + .initial_dependency_failure, + .initial_sema_failure, + => return null, + .codegen_failure, + .codegen_failure_retryable, + .repeat_dependency_failure, + .repeat_sema_failure, + .repeat_in_progress, + .complete, + => return &self.typed_value.most_recent, + } } }; @@ -325,22 +358,19 @@ pub const Module = struct { /// The value is the source instruction. queued: *text.Inst.Fn, in_progress: *Analysis, - /// There will be a corresponding ErrorMsg in Module.failed_fns + /// There will be a corresponding ErrorMsg in Module.failed_decls failure, success: Body, }, - /// The direct container of the Fn. This field will need to get more fleshed out when - /// self-hosted supports proper struct types and Zig AST => ZIR. - scope: *Scope.ZIRModule, /// This memory is temporary and points to stack memory for the duration /// of Fn analysis. pub const Analysis = struct { inner_block: Scope.Block, - /// null value means a semantic analysis error happened. - inst_table: std.AutoHashMap(*text.Inst, ?*Inst), - /// Owns the memory for instructions - arena: std.heap.ArenaAllocator, + /// TODO Performance optimization idea: instead of this inst_table, + /// use a field in the text.Inst instead to track corresponding instructions + inst_table: std.AutoHashMap(*text.Inst, *Inst), + needed_inst_capacity: usize, }; }; @@ -374,6 +404,16 @@ pub const Module = struct { } } + /// Asserts the scope has a parent which is a ZIRModule and + /// returns it. + pub fn namespace(self: *Scope) *ZIRModule { + switch (self.tag) { + .block => return self.cast(Block).?.decl.scope, + .decl => return self.cast(DeclAnalysis).?.decl.scope, + .zir_module => return self.cast(ZIRModule).?, + } + } + pub const Tag = enum { zir_module, block, @@ -407,11 +447,11 @@ pub const Module = struct { .unloaded_parse_failure, => {}, .loaded_success => { - allocator.free(contents.source); + allocator.free(self.source.bytes); self.contents.module.deinit(allocator); }, .loaded_parse_failure => { - allocator.free(contents.source); + allocator.free(self.source.bytes); }, } self.* = undefined; @@ -469,8 +509,8 @@ pub const Module = struct { ) !void { const loc = std.zig.findLineColumn(source, simple_err_msg.byte_offset); try errors.append(.{ - .src_path = try mem.dupe(u8, &arena.allocator, sub_file_path), - .msg = try mem.dupe(u8, &arena.allocator, simple_err_msg.msg), + .src_path = try arena.allocator.dupe(u8, sub_file_path), + .msg = try arena.allocator.dupe(u8, simple_err_msg.msg), .byte_offset = simple_err_msg.byte_offset, .line = loc.line, .column = loc.column, @@ -480,7 +520,7 @@ pub const Module = struct { pub fn deinit(self: *Module) void { const allocator = self.allocator; - allocator.free(self.errors); + self.work_stack.deinit(allocator); { var it = self.decl_table.iterator(); while (it.next()) |kv| { @@ -488,8 +528,44 @@ pub const Module = struct { } self.decl_table.deinit(); } + { + var it = self.failed_decls.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_decls.deinit(); + } + { + var it = self.failed_files.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_files.deinit(); + } + { + var it = self.failed_exports.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_exports.deinit(); + } + self.decl_exports.deinit(); + { + var it = self.export_owners.iterator(); + while (it.next()) |kv| { + const export_list = kv.value; + for (export_list) |exp| { + allocator.destroy(exp); + } + allocator.free(export_list); + } + self.failed_exports.deinit(); + } self.root_pkg.destroy(); - self.root_scope.deinit(); + { + self.root_scope.deinit(allocator); + allocator.destroy(self.root_scope); + } self.* = undefined; } @@ -504,19 +580,20 @@ pub const Module = struct { // Analyze the root source file now. self.analyzeRoot(self.root_scope) catch |err| switch (err) { error.AnalysisFail => { - assert(self.failed_files.size != 0); + assert(self.totalErrorCount() != 0); }, else => |e| return e, }; + try self.performAllTheWork(); + try self.bin_file.flush(); self.link_error_flags = self.bin_file.error_flags; } pub fn totalErrorCount(self: *Module) usize { return self.failed_decls.size + - self.failed_fns.size + - self.failed_decls.size + + self.failed_files.size + self.failed_exports.size + @boolToInt(self.link_error_flags.no_entry_point_found); } @@ -533,17 +610,8 @@ pub const Module = struct { while (it.next()) |kv| { const scope = kv.key; const err_msg = kv.value; - const source = scope.parse_failure.source; - AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg); - } - } - { - var it = self.failed_fns.iterator(); - while (it.next()) |kv| { - const func = kv.key; - const err_msg = kv.value; - const source = func.scope.success.source; - AllErrors.add(&arena, &errors, func.scope.sub_file_path, source, err_msg); + const source = scope.source.bytes; + try AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg.*); } } { @@ -551,8 +619,8 @@ pub const Module = struct { while (it.next()) |kv| { const decl = kv.key; const err_msg = kv.value; - const source = decl.scope.success.source; - AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg); + const source = decl.scope.source.bytes; + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); } } { @@ -560,14 +628,14 @@ pub const Module = struct { while (it.next()) |kv| { const decl = kv.key.owner_decl; const err_msg = kv.value; - const source = decl.scope.success.source; - try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg); + const source = decl.scope.source.bytes; + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); } } if (self.link_error_flags.no_entry_point_found) { try errors.append(.{ - .src_path = self.module.root_src_path, + .src_path = self.root_pkg.root_src_path, .line = 0, .column = 0, .byte_offset = 0, @@ -579,12 +647,56 @@ pub const Module = struct { return AllErrors{ .arena = arena.state, - .list = try mem.dupe(&arena.allocator, AllErrors.Message, errors.items), + .list = try arena.allocator.dupe(AllErrors.Message, errors.items), }; } const InnerError = error{ OutOfMemory, AnalysisFail }; + pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { + while (self.work_stack.popOrNull()) |work_item| switch (work_item) { + .codegen_decl => |decl| switch (decl.analysis) { + .initial_in_progress, + .repeat_in_progress, + => unreachable, + + .initial_sema_failure, + .repeat_sema_failure, + .codegen_failure, + .initial_dependency_failure, + .repeat_dependency_failure, + => continue, + + .complete, .codegen_failure_retryable => { + if (decl.typed_value.most_recent.typed_value.val.cast(Value.Payload.Function)) |payload| { + switch (payload.func.analysis) { + .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { + error.AnalysisFail => continue, + else => |e| return e, + }, + .in_progress => unreachable, + .failure => continue, + .success => {}, + } + } + self.bin_file.updateDecl(self, decl) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( + self.allocator, + decl.src, + "unable to codegen: {}", + .{@errorName(err)}, + )); + decl.analysis = .codegen_failure_retryable; + }, + }; + }, + }, + }; + } + fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { // TODO use the cache to identify, from the modified source files, the decls which have // changed based on the span of memory that represents the decl in the re-parsed source file. @@ -650,56 +762,39 @@ pub const Module = struct { try analyzeExport(self, &root_scope.base, export_inst); } } - - while (self.work_stack.pop()) |work_item| switch (work_item) { - .codegen_decl => |decl| switch (decl.analysis) { - .success => { - if (decl.typed_value.most_recent.typed_value.val.cast(Value.Function)) |payload| { - switch (payload.func.analysis) { - .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { - error.AnalysisFail => { - assert(func_payload.func.analysis == .failure); - continue; - }, - else => |e| return e, - }, - .in_progress => unreachable, - .failure => continue, - .success => {}, - } - } - try self.bin_file.updateDecl(self, decl); - }, - }, - }; } fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { // Use the Decl's arena for function memory. var arena = decl.typed_value.most_recent.arena.?.promote(self.allocator); defer decl.typed_value.most_recent.arena.?.* = arena.state; - var analysis: Analysis = .{ + var analysis: Fn.Analysis = .{ .inner_block = .{ .func = func, .decl = decl, .instructions = .{}, .arena = &arena.allocator, }, - .inst_table = std.AutoHashMap(*text.Inst, ?*Inst).init(self.allocator), + .needed_inst_capacity = 0, + .inst_table = std.AutoHashMap(*text.Inst, *Inst).init(self.allocator), }; - defer analysis.inner_block.instructions.deinit(); + defer analysis.inner_block.instructions.deinit(self.allocator); defer analysis.inst_table.deinit(); const fn_inst = func.analysis.queued; func.analysis = .{ .in_progress = &analysis }; - try self.analyzeBody(&analysis.inner_block, fn_inst.positionals.body); + try self.analyzeBody(&analysis.inner_block.base, fn_inst.positionals.body); - func.analysis = .{ .success = .{ .instructions = analysis.inner_block.instructions.toOwnedSlice() } }; + func.analysis = .{ + .success = .{ + .instructions = try arena.allocator.dupe(*Inst, analysis.inner_block.instructions.items), + }, + }; } fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { - const hash = old_inst.fullyQualifiedNameHash(); + const hash = Decl.hashSimpleName(old_inst.name); if (self.decl_table.get(hash)) |kv| { return kv.value; } else { @@ -711,7 +806,7 @@ pub const Module = struct { errdefer self.allocator.free(name); new_decl.* = .{ .name = name, - .scope = scope.findZIRModule(), + .scope = scope.namespace(), .src = old_inst.src, .typed_value = .{ .never_succeeded = {} }, .analysis = .initial_in_progress, @@ -726,12 +821,11 @@ pub const Module = struct { }; errdefer decl_scope.arena.deinit(); - const arena_state = try self.allocator.create(std.heap.ArenaAllocator.State); - errdefer self.allocator.destroy(arena_state); + const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); const typed_value = try self.analyzeInstConst(&decl_scope.base, old_inst); - arena_state.* = decl_scope.arena; + arena_state.* = decl_scope.arena.state; new_decl.typed_value = .{ .most_recent = .{ @@ -741,7 +835,7 @@ pub const Module = struct { }; new_decl.analysis = .complete; // We ensureCapacity when scanning for decls. - self.work_stack.appendAssumeCapacity(self.allocator, .{ .codegen_decl = new_decl }); + self.work_stack.appendAssumeCapacity(.{ .codegen_decl = new_decl }); return new_decl; } } @@ -756,6 +850,7 @@ pub const Module = struct { .initial_sema_failure, .repeat_sema_failure, .codegen_failure, + .codegen_failure_retryable, => return error.AnalysisFail, .complete => return decl, @@ -764,14 +859,14 @@ pub const Module = struct { fn resolveInst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Inst { if (scope.cast(Scope.Block)) |block| { - if (block.func.inst_table.get(old_inst)) |kv| { - return kv.value.ptr orelse return error.AnalysisFail; + if (block.func.analysis.in_progress.inst_table.get(old_inst)) |kv| { + return kv.value; } } const decl = try self.resolveCompleteDecl(scope, old_inst); const decl_ref = try self.analyzeDeclRef(scope, old_inst.src, decl); - return self.analyzeDeref(scope, old_inst.src, decl_ref); + return self.analyzeDeref(scope, old_inst.src, decl_ref, old_inst.src); } fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { @@ -819,7 +914,7 @@ pub const Module = struct { return val.toType(); } - fn analyzeExport(self: *Module, scope: *Scope, export_inst: *text.Inst.Export) !void { + fn analyzeExport(self: *Module, scope: *Scope, export_inst: *text.Inst.Export) InnerError!void { try self.decl_exports.ensureCapacity(self.decl_exports.size + 1); try self.export_owners.ensureCapacity(self.export_owners.size + 1); const symbol_name = try self.resolveConstString(scope, export_inst.positionals.symbol_name); @@ -840,7 +935,7 @@ pub const Module = struct { const owner_decl = scope.decl(); new_export.* = .{ - .options = .{ .data = .{ .name = symbol_name } }, + .options = .{ .name = symbol_name }, .src = export_inst.base.src, .link = .{}, .owner_decl = owner_decl, @@ -865,7 +960,19 @@ pub const Module = struct { de_gop.kv.value[de_gop.kv.value.len - 1] = new_export; errdefer de_gop.kv.value = self.allocator.shrink(de_gop.kv.value, de_gop.kv.value.len - 1); - try self.bin_file.updateDeclExports(self, decl, de_gop.kv.value); + self.bin_file.updateDeclExports(self, exported_decl, de_gop.kv.value) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + try self.failed_exports.ensureCapacity(self.failed_exports.size + 1); + self.failed_exports.putAssumeCapacityNoClobber(new_export, try ErrorMsg.create( + self.allocator, + export_inst.base.src, + "unable to export: {}", + .{@errorName(err)}, + )); + new_export.status = .failed_retryable; + }, + }; } /// TODO should not need the cast on the last parameter at the callsites @@ -976,7 +1083,7 @@ pub const Module = struct { fn constIntBig(self: *Module, scope: *Scope, src: usize, ty: Type, big_int: BigIntConst) !*Inst { const val_payload = if (big_int.positive) blk: { if (big_int.to(u64)) |x| { - return self.constIntUnsigned(src, ty, x); + return self.constIntUnsigned(scope, src, ty, x); } else |err| switch (err) { error.NegativeIntoUnsigned => unreachable, error.TargetTooSmall => {}, // handled below @@ -986,7 +1093,7 @@ pub const Module = struct { break :blk &big_int_payload.base; } else blk: { if (big_int.to(i64)) |x| { - return self.constIntSigned(src, ty, x); + return self.constIntSigned(scope, src, ty, x); } else |err| switch (err) { error.NegativeIntoUnsigned => unreachable, error.TargetTooSmall => {}, // handled below @@ -1014,15 +1121,17 @@ pub const Module = struct { switch (old_inst.tag) { .breakpoint => return self.analyzeInstBreakpoint(scope, old_inst.cast(text.Inst.Breakpoint).?), .call => return self.analyzeInstCall(scope, old_inst.cast(text.Inst.Call).?), + .declref => return self.analyzeInstDeclRef(scope, old_inst.cast(text.Inst.DeclRef).?), .str => { - // We can use this reference because Inst.Const's Value is arena-allocated. - // The value would get copied to a MemoryCell before the `text.Inst.Str` lifetime ends. const bytes = old_inst.cast(text.Inst.Str).?.positionals.bytes; - return self.constStr(old_inst.src, bytes); + // The bytes references memory inside the ZIR text module, which can get deallocated + // after semantic analysis is complete. We need the memory to be in the Decl's arena. + const arena_bytes = try scope.arena().dupe(u8, bytes); + return self.constStr(scope, old_inst.src, arena_bytes); }, .int => { const big_int = old_inst.cast(text.Inst.Int).?.positionals.int; - return self.constIntBig(old_inst.src, Type.initTag(.comptime_int), big_int); + return self.constIntBig(scope, old_inst.src, Type.initTag(.comptime_int), big_int); }, .ptrtoint => return self.analyzeInstPtrToInt(scope, old_inst.cast(text.Inst.PtrToInt).?), .fieldptr => return self.analyzeInstFieldPtr(scope, old_inst.cast(text.Inst.FieldPtr).?), @@ -1036,7 +1145,7 @@ pub const Module = struct { try self.analyzeExport(scope, old_inst.cast(text.Inst.Export).?); return self.constVoid(scope, old_inst.src); }, - .primitive => return self.analyzeInstPrimitive(old_inst.cast(text.Inst.Primitive).?), + .primitive => return self.analyzeInstPrimitive(scope, old_inst.cast(text.Inst.Primitive).?), .fntype => return self.analyzeInstFnType(scope, old_inst.cast(text.Inst.FnType).?), .intcast => return self.analyzeInstIntCast(scope, old_inst.cast(text.Inst.IntCast).?), .bitcast => return self.analyzeInstBitCast(scope, old_inst.cast(text.Inst.BitCast).?), @@ -1054,6 +1163,14 @@ pub const Module = struct { return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); } + fn analyzeInstDeclRef(self: *Module, scope: *Scope, inst: *text.Inst.DeclRef) InnerError!*Inst { + return self.fail(scope, inst.base.src, "TODO implement analyzeInstDeclFef", .{}); + } + + fn analyzeDeclRef(self: *Module, scope: *Scope, src: usize, decl: *Decl) InnerError!*Inst { + return self.fail(scope, src, "TODO implement analyzeDeclRef", .{}); + } + fn analyzeInstCall(self: *Module, scope: *Scope, inst: *text.Inst.Call) InnerError!*Inst { const func = try self.resolveInst(scope, inst.positionals.func); if (func.ty.zigTypeTag() != .Fn) @@ -1123,8 +1240,7 @@ pub const Module = struct { const new_func = try scope.arena().create(Fn); new_func.* = .{ .fn_type = fn_type, - .analysis = .{ .queued = fn_inst.positionals.body }, - .scope = scope.namespace(), + .analysis = .{ .queued = fn_inst }, }; const fn_payload = try scope.arena().create(Value.Payload.Function); fn_payload.* = .{ .func = new_func }; @@ -1141,28 +1257,28 @@ pub const Module = struct { fntype.positionals.param_types.len == 0 and fntype.kw_args.cc == .Unspecified) { - return self.constType(fntype.base.src, Type.initTag(.fn_noreturn_no_args)); + return self.constType(scope, fntype.base.src, Type.initTag(.fn_noreturn_no_args)); } if (return_type.zigTypeTag() == .NoReturn and fntype.positionals.param_types.len == 0 and fntype.kw_args.cc == .Naked) { - return self.constType(fntype.base.src, Type.initTag(.fn_naked_noreturn_no_args)); + return self.constType(scope, fntype.base.src, Type.initTag(.fn_naked_noreturn_no_args)); } if (return_type.zigTypeTag() == .Void and fntype.positionals.param_types.len == 0 and fntype.kw_args.cc == .C) { - return self.constType(fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); + return self.constType(scope, fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); } return self.fail(scope, fntype.base.src, "TODO implement fntype instruction more", .{}); } - fn analyzeInstPrimitive(self: *Module, primitive: *text.Inst.Primitive) InnerError!*Inst { - return self.constType(primitive.base.src, primitive.positionals.tag.toType()); + fn analyzeInstPrimitive(self: *Module, scope: *Scope, primitive: *text.Inst.Primitive) InnerError!*Inst { + return self.constType(scope, primitive.base.src, primitive.positionals.tag.toType()); } fn analyzeInstAs(self: *Module, scope: *Scope, as: *text.Inst.As) InnerError!*Inst { @@ -1332,18 +1448,22 @@ pub const Module = struct { fn analyzeInstDeref(self: *Module, scope: *Scope, deref: *text.Inst.Deref) InnerError!*Inst { const ptr = try self.resolveInst(scope, deref.positionals.ptr); + return self.analyzeDeref(scope, deref.base.src, ptr, deref.positionals.ptr.src); + } + + fn analyzeDeref(self: *Module, scope: *Scope, src: usize, ptr: *Inst, ptr_src: usize) InnerError!*Inst { const elem_ty = switch (ptr.ty.zigTypeTag()) { .Pointer => ptr.ty.elemType(), - else => return self.fail(scope, deref.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}), + else => return self.fail(scope, ptr_src, "expected pointer, found '{}'", .{ptr.ty}), }; if (ptr.value()) |val| { - return self.constInst(scope, deref.base.src, .{ + return self.constInst(scope, src, .{ .ty = elem_ty, - .val = val.pointerDeref(), + .val = try val.pointerDeref(scope.arena()), }); } - return self.fail(scope, deref.base.src, "TODO implement runtime deref", .{}); + return self.fail(scope, src, "TODO implement runtime deref", .{}); } fn analyzeInstAsm(self: *Module, scope: *Scope, assembly: *text.Inst.Asm) InnerError!*Inst { @@ -1390,7 +1510,7 @@ pub const Module = struct { const rhs_ty_tag = rhs.ty.zigTypeTag(); if (is_equality_cmp and lhs_ty_tag == .Null and rhs_ty_tag == .Null) { // null == null, null != null - return self.constBool(inst.base.src, op == .eq); + return self.constBool(scope, inst.base.src, op == .eq); } else if (is_equality_cmp and ((lhs_ty_tag == .Null and rhs_ty_tag == .Optional) or rhs_ty_tag == .Null and lhs_ty_tag == .Optional)) @@ -1399,7 +1519,7 @@ pub const Module = struct { const opt_operand = if (lhs_ty_tag == .Optional) lhs else rhs; if (opt_operand.value()) |opt_val| { const is_null = opt_val.isNull(); - return self.constBool(inst.base.src, if (op == .eq) is_null else !is_null); + return self.constBool(scope, inst.base.src, if (op == .eq) is_null else !is_null); } const b = try self.requireRuntimeBlock(scope, inst.base.src); switch (op) { @@ -1468,32 +1588,27 @@ pub const Module = struct { const parent_block = try self.requireRuntimeBlock(scope, inst.base.src); var true_block: Scope.Block = .{ - .base = .{ .parent = scope }, .func = parent_block.func, + .decl = parent_block.decl, .instructions = .{}, + .arena = parent_block.arena, }; - defer true_block.instructions.deinit(); + defer true_block.instructions.deinit(self.allocator); try self.analyzeBody(&true_block.base, inst.positionals.true_body); var false_block: Scope.Block = .{ - .base = .{ .parent = scope }, .func = parent_block.func, + .decl = parent_block.decl, .instructions = .{}, + .arena = parent_block.arena, }; - defer false_block.instructions.deinit(); + defer false_block.instructions.deinit(self.allocator); try self.analyzeBody(&false_block.base, inst.positionals.false_body); - // Copy the instruction pointers to the arena memory - const true_instructions = try scope.arena().alloc(*Inst, true_block.instructions.items.len); - const false_instructions = try scope.arena().alloc(*Inst, false_block.instructions.items.len); - - mem.copy(*Inst, true_instructions, true_block.instructions.items); - mem.copy(*Inst, false_instructions, false_block.instructions.items); - return self.addNewInstArgs(parent_block, inst.base.src, Type.initTag(.void), Inst.CondBr, Inst.Args(Inst.CondBr){ .condition = cond, - .true_body = .{ .instructions = true_instructions }, - .false_body = .{ .instructions = false_instructions }, + .true_body = .{ .instructions = try scope.arena().dupe(*Inst, true_block.instructions.items) }, + .false_body = .{ .instructions = try scope.arena().dupe(*Inst, false_block.instructions.items) }, }); } @@ -1521,15 +1636,18 @@ pub const Module = struct { } fn analyzeBody(self: *Module, scope: *Scope, body: text.Module.Body) !void { - for (body.instructions) |src_inst| { - const new_inst = self.analyzeInst(scope, src_inst) catch |err| { - if (scope.cast(Scope.Block)) |b| { - self.fns.items[b.func.fn_index].analysis_status = .failure; - try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = null }); - } - return err; - }; - if (scope.cast(Scope.Block)) |b| try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = new_inst }); + if (scope.cast(Scope.Block)) |b| { + const analysis = b.func.analysis.in_progress; + analysis.needed_inst_capacity += body.instructions.len; + try analysis.inst_table.ensureCapacity(analysis.needed_inst_capacity); + for (body.instructions) |src_inst| { + const new_inst = try self.analyzeInst(scope, src_inst); + analysis.inst_table.putAssumeCapacityNoClobber(src_inst, new_inst); + } + } else { + for (body.instructions) |src_inst| { + _ = try self.analyzeInst(scope, src_inst); + } } } @@ -1575,7 +1693,7 @@ pub const Module = struct { if (lhs.value()) |lhs_val| { if (rhs.value()) |rhs_val| { - return self.constBool(src, Value.compare(lhs_val, op, rhs_val)); + return self.constBool(scope, src, Value.compare(lhs_val, op, rhs_val)); } } @@ -1647,8 +1765,8 @@ pub const Module = struct { const zcmp = lhs_val.orderAgainstZero(); if (lhs_val.floatHasFraction()) { switch (op) { - .eq => return self.constBool(src, false), - .neq => return self.constBool(src, true), + .eq => return self.constBool(scope, src, false), + .neq => return self.constBool(scope, src, true), else => {}, } if (zcmp == .lt) { @@ -1682,8 +1800,8 @@ pub const Module = struct { const zcmp = rhs_val.orderAgainstZero(); if (rhs_val.floatHasFraction()) { switch (op) { - .eq => return self.constBool(src, false), - .neq => return self.constBool(src, true), + .eq => return self.constBool(scope, src, false), + .neq => return self.constBool(scope, src, true), else => {}, } if (zcmp == .lt) { @@ -1711,7 +1829,7 @@ pub const Module = struct { const casted_bits = std.math.cast(u16, max_bits) catch |err| switch (err) { error.Overflow => return self.fail(scope, src, "{} exceeds maximum integer bit count", .{max_bits}), }; - break :blk try self.makeIntType(dest_int_is_signed, casted_bits); + break :blk try self.makeIntType(scope, dest_int_is_signed, casted_bits); }; const casted_lhs = try self.coerce(scope, dest_type, lhs); const casted_rhs = try self.coerce(scope, dest_type, lhs); @@ -1807,7 +1925,6 @@ pub const Module = struct { fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { @setCold(true); try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); - try self.failed_fns.ensureCapacity(self.failed_fns.size + 1); const err_msg = try ErrorMsg.create(self.allocator, src, format, args); switch (scope.tag) { .decl => { @@ -1820,10 +1937,11 @@ pub const Module = struct { self.failed_decls.putAssumeCapacityNoClobber(decl, err_msg); }, .block => { - const func = scope.cast(Scope.Block).?.func; - func.analysis = .failure; - self.failed_fns.putAssumeCapacityNoClobber(func, err_msg); + const block = scope.cast(Scope.Block).?; + block.func.analysis = .failure; + self.failed_decls.putAssumeCapacityNoClobber(block.decl, err_msg); }, + .zir_module => unreachable, } return error.AnalysisFail; } @@ -1868,7 +1986,7 @@ pub const ErrorMsg = struct { } pub fn deinit(self: *ErrorMsg, allocator: *Allocator) void { - allocator.free(err_msg.msg); + allocator.free(self.msg); self.* = undefined; } }; @@ -1920,7 +2038,6 @@ pub fn main() anyerror!void { .decl_exports = std.AutoHashMap(*Module.Decl, []*Module.Export).init(allocator), .export_owners = std.AutoHashMap(*Module.Decl, []*Module.Export).init(allocator), .failed_decls = std.AutoHashMap(*Module.Decl, *ErrorMsg).init(allocator), - .failed_fns = std.AutoHashMap(*Module.Fn, *ErrorMsg).init(allocator), .failed_files = std.AutoHashMap(*Module.Scope.ZIRModule, *ErrorMsg).init(allocator), .failed_exports = std.AutoHashMap(*Module.Export, *ErrorMsg).init(allocator), }; @@ -1929,8 +2046,8 @@ pub fn main() anyerror!void { try module.update(); - const errors = try module.getAllErrorsAlloc(); - defer errors.deinit(); + var errors = try module.getAllErrorsAlloc(); + defer errors.deinit(allocator); if (errors.list.len != 0) { for (errors.list) |full_err_msg| { @@ -1954,6 +2071,3 @@ pub fn main() anyerror!void { try bos.flush(); } } - -// Performance optimization ideas: -// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index 74fe57bcf2..6c64c6c9de 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -8,6 +8,7 @@ const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; const Type = @import("../type.zig").Type; const Value = @import("../value.zig").Value; +const TypedValue = @import("../TypedValue.zig"); const ir = @import("../ir.zig"); /// These are instructions that correspond to the ZIR text format. See `ir.Inst` for @@ -462,6 +463,7 @@ pub const Module = struct { switch (decl.tag) { .breakpoint => return self.writeInstToStreamGeneric(stream, .breakpoint, decl, inst_table), .call => return self.writeInstToStreamGeneric(stream, .call, decl, inst_table), + .declref => return self.writeInstToStreamGeneric(stream, .declref, decl, inst_table), .str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table), .int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table), .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table), @@ -576,6 +578,7 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module .source = source, .global_name_map = &global_name_map, .decls = .{}, + .unnamed_index = 0, }; errdefer parser.arena.deinit(); @@ -601,6 +604,7 @@ const Parser = struct { decls: std.ArrayListUnmanaged(*Inst), global_name_map: *std.StringHashMap(usize), error_msg: ?ErrorMsg = null, + unnamed_index: usize, const Body = struct { instructions: std.ArrayList(*Inst), @@ -626,12 +630,12 @@ const Parser = struct { skipSpace(self); try requireEatBytes(self, "="); skipSpace(self); - const inst = try parseInstruction(self, &body_context); + const inst = try parseInstruction(self, &body_context, ident[1..]); const ident_index = body_context.instructions.items.len; if (try body_context.name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); } - try body_context.instructions.append(self.allocator, inst); + try body_context.instructions.append(inst); continue; }, ' ', '\n' => continue, @@ -712,7 +716,7 @@ const Parser = struct { skipSpace(self); try requireEatBytes(self, "="); skipSpace(self); - const inst = try parseInstruction(self, null); + const inst = try parseInstruction(self, null, ident[1..]); const ident_index = self.decls.items.len; if (try self.global_name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); @@ -781,12 +785,12 @@ const Parser = struct { return error.ParseFailure; } - fn parseInstruction(self: *Parser, body_ctx: ?*Body) InnerError!*Inst { + fn parseInstruction(self: *Parser, body_ctx: ?*Body, name: []const u8) InnerError!*Inst { const fn_name = try skipToAndOver(self, '('); inline for (@typeInfo(Inst.Tag).Enum.fields) |field| { if (mem.eql(u8, field.name, fn_name)) { const tag = @field(Inst.Tag, field.name); - return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx); + return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name); } } return self.fail("unknown instruction '{}'", .{fn_name}); @@ -797,9 +801,11 @@ const Parser = struct { comptime fn_name: []const u8, comptime InstType: type, body_ctx: ?*Body, - ) !*Inst { + inst_name: []const u8, + ) InnerError!*Inst { const inst_specific = try self.arena.allocator.create(InstType); inst_specific.base = .{ + .name = inst_name, .src = self.i, .tag = InstType.base_tag, }; @@ -885,7 +891,7 @@ const Parser = struct { var instructions = std.ArrayList(*Inst).init(&self.arena.allocator); while (true) { skipSpace(self); - try instructions.append(self.allocator, try parseParameterInst(self, body_ctx)); + try instructions.append(try parseParameterInst(self, body_ctx)); skipSpace(self); if (!eatByte(self, ',')) break; } @@ -930,13 +936,21 @@ const Parser = struct { } else { const name = try self.arena.allocator.create(Inst.Str); name.* = .{ - .base = .{ .src = src, .tag = Inst.Str.base_tag }, + .base = .{ + .name = try self.generateName(), + .src = src, + .tag = Inst.Str.base_tag, + }, .positionals = .{ .bytes = ident }, .kw_args = .{}, }; const declref = try self.arena.allocator.create(Inst.DeclRef); declref.* = .{ - .base = .{ .src = src, .tag = Inst.DeclRef.base_tag }, + .base = .{ + .name = try self.generateName(), + .src = src, + .tag = Inst.DeclRef.base_tag, + }, .positionals = .{ .name = &name.base }, .kw_args = .{}, }; @@ -949,25 +963,31 @@ const Parser = struct { return self.decls.items[kv.value]; } } + + fn generateName(self: *Parser) ![]u8 { + const result = try std.fmt.allocPrint(&self.arena.allocator, "unnamed${}", .{self.unnamed_index}); + self.unnamed_index += 1; + return result; + } }; pub fn emit_zir(allocator: *Allocator, old_module: ir.Module) !Module { var ctx: EmitZIR = .{ .allocator = allocator, - .decls = std.ArrayList(*Inst).init(allocator), + .decls = .{}, .decl_table = std.AutoHashMap(*ir.Inst, *Inst).init(allocator), .arena = std.heap.ArenaAllocator.init(allocator), .old_module = &old_module, }; - defer ctx.decls.deinit(); + defer ctx.decls.deinit(allocator); defer ctx.decl_table.deinit(); errdefer ctx.arena.deinit(); try ctx.emit(); return Module{ - .decls = ctx.decls.toOwnedSlice(), - .arena = ctx.arena, + .decls = ctx.decls.toOwnedSlice(allocator), + .arena = ctx.arena.state, }; } @@ -975,23 +995,32 @@ const EmitZIR = struct { allocator: *Allocator, arena: std.heap.ArenaAllocator, old_module: *const ir.Module, - decls: std.ArrayList(*Inst), + decls: std.ArrayListUnmanaged(*Inst), decl_table: std.AutoHashMap(*ir.Inst, *Inst), fn emit(self: *EmitZIR) !void { - for (self.old_module.exports) |module_export| { - const export_value = try self.emitTypedValue(module_export.src, module_export.typed_value); - const symbol_name = try self.emitStringLiteral(module_export.src, module_export.name); - const export_inst = try self.arena.allocator.create(Inst.Export); - export_inst.* = .{ - .base = .{ .src = module_export.src, .tag = Inst.Export.base_tag }, - .positionals = .{ - .symbol_name = symbol_name, - .value = export_value, - }, - .kw_args = .{}, - }; - try self.decls.append(self.allocator, &export_inst.base); + var it = self.old_module.decl_exports.iterator(); + while (it.next()) |kv| { + const decl = kv.key; + const exports = kv.value; + const export_value = try self.emitTypedValue(decl.src, decl.typed_value.most_recent.typed_value); + for (exports) |module_export| { + const symbol_name = try self.emitStringLiteral(module_export.src, module_export.options.name); + const export_inst = try self.arena.allocator.create(Inst.Export); + export_inst.* = .{ + .base = .{ + .name = try self.autoName(), + .src = module_export.src, + .tag = Inst.Export.base_tag, + }, + .positionals = .{ + .symbol_name = symbol_name, + .value = export_value, + }, + .kw_args = .{}, + }; + try self.decls.append(self.allocator, &export_inst.base); + } } } @@ -1012,7 +1041,11 @@ const EmitZIR = struct { const big_int_space = try self.arena.allocator.create(Value.BigIntSpace); const int_inst = try self.arena.allocator.create(Inst.Int); int_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Int.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Int.base_tag, + }, .positionals = .{ .int = val.toBigInt(big_int_space), }, @@ -1022,7 +1055,7 @@ const EmitZIR = struct { return &int_inst.base; } - fn emitTypedValue(self: *EmitZIR, src: usize, typed_value: ir.TypedValue) Allocator.Error!*Inst { + fn emitTypedValue(self: *EmitZIR, src: usize, typed_value: TypedValue) Allocator.Error!*Inst { switch (typed_value.ty.zigTypeTag()) { .Pointer => { const ptr_elem_type = typed_value.ty.elemType(); @@ -1044,7 +1077,11 @@ const EmitZIR = struct { .Int => { const as_inst = try self.arena.allocator.create(Inst.As); as_inst.* = .{ - .base = .{ .src = src, .tag = Inst.As.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.As.base_tag, + }, .positionals = .{ .dest_type = try self.emitType(src, typed_value.ty), .value = try self.emitComptimeIntVal(src, typed_value.val), @@ -1060,8 +1097,7 @@ const EmitZIR = struct { return self.emitType(src, ty); }, .Fn => { - const index = typed_value.val.cast(Value.Payload.Function).?.index; - const module_fn = self.old_module.fns[index]; + const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; var inst_table = std.AutoHashMap(*ir.Inst, *Inst).init(self.allocator); defer inst_table.deinit(); @@ -1069,7 +1105,7 @@ const EmitZIR = struct { var instructions = std.ArrayList(*Inst).init(self.allocator); defer instructions.deinit(); - try self.emitBody(module_fn.body, &inst_table, &instructions); + try self.emitBody(module_fn.analysis.success, &inst_table, &instructions); const fn_type = try self.emitType(src, module_fn.fn_type); @@ -1078,7 +1114,11 @@ const EmitZIR = struct { const fn_inst = try self.arena.allocator.create(Inst.Fn); fn_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Fn.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Fn.base_tag, + }, .positionals = .{ .fn_type = fn_type, .body = .{ .instructions = arena_instrs }, @@ -1095,7 +1135,11 @@ const EmitZIR = struct { fn emitTrivial(self: *EmitZIR, src: usize, comptime T: type) Allocator.Error!*Inst { const new_inst = try self.arena.allocator.create(T); new_inst.* = .{ - .base = .{ .src = src, .tag = T.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = T.base_tag, + }, .positionals = .{}, .kw_args = .{}, }; @@ -1120,7 +1164,11 @@ const EmitZIR = struct { elem.* = try self.resolveInst(inst_table, old_inst.args.args[i]); } new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.Call.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.Call.base_tag, + }, .positionals = .{ .func = try self.resolveInst(inst_table, old_inst.args.func), .args = args, @@ -1152,7 +1200,11 @@ const EmitZIR = struct { } new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.Asm.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.Asm.base_tag, + }, .positionals = .{ .asm_source = try self.emitStringLiteral(inst.src, old_inst.args.asm_source), .return_type = try self.emitType(inst.src, inst.ty), @@ -1174,7 +1226,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.PtrToInt).?; const new_inst = try self.arena.allocator.create(Inst.PtrToInt); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.PtrToInt.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.PtrToInt.base_tag, + }, .positionals = .{ .ptr = try self.resolveInst(inst_table, old_inst.args.ptr), }, @@ -1186,7 +1242,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.BitCast).?; const new_inst = try self.arena.allocator.create(Inst.BitCast); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.BitCast.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.BitCast.base_tag, + }, .positionals = .{ .dest_type = try self.emitType(inst.src, inst.ty), .operand = try self.resolveInst(inst_table, old_inst.args.operand), @@ -1199,7 +1259,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.Cmp).?; const new_inst = try self.arena.allocator.create(Inst.Cmp); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.Cmp.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.Cmp.base_tag, + }, .positionals = .{ .lhs = try self.resolveInst(inst_table, old_inst.args.lhs), .rhs = try self.resolveInst(inst_table, old_inst.args.rhs), @@ -1223,7 +1287,11 @@ const EmitZIR = struct { const new_inst = try self.arena.allocator.create(Inst.CondBr); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.CondBr.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.CondBr.base_tag, + }, .positionals = .{ .condition = try self.resolveInst(inst_table, old_inst.args.condition), .true_body = .{ .instructions = true_body.toOwnedSlice() }, @@ -1237,7 +1305,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.IsNull).?; const new_inst = try self.arena.allocator.create(Inst.IsNull); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.IsNull.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.IsNull.base_tag, + }, .positionals = .{ .operand = try self.resolveInst(inst_table, old_inst.args.operand), }, @@ -1249,7 +1321,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.IsNonNull).?; const new_inst = try self.arena.allocator.create(Inst.IsNonNull); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.IsNonNull.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.IsNonNull.base_tag, + }, .positionals = .{ .operand = try self.resolveInst(inst_table, old_inst.args.operand), }, @@ -1258,7 +1334,7 @@ const EmitZIR = struct { break :blk &new_inst.base; }, }; - try instructions.append(self.allocator, new_inst); + try instructions.append(new_inst); try inst_table.putNoClobber(inst, new_inst); } } @@ -1301,7 +1377,11 @@ const EmitZIR = struct { const fntype_inst = try self.arena.allocator.create(Inst.FnType); fntype_inst.* = .{ - .base = .{ .src = src, .tag = Inst.FnType.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.FnType.base_tag, + }, .positionals = .{ .param_types = emitted_params, .return_type = try self.emitType(src, ty.fnReturnType()), @@ -1318,10 +1398,18 @@ const EmitZIR = struct { } } + fn autoName(self: *EmitZIR) ![]u8 { + return std.fmt.allocPrint(&self.arena.allocator, "{}", .{self.decls.items.len}); + } + fn emitPrimitiveType(self: *EmitZIR, src: usize, tag: Inst.Primitive.BuiltinType) !*Inst { const primitive_inst = try self.arena.allocator.create(Inst.Primitive); primitive_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Primitive.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Primitive.base_tag, + }, .positionals = .{ .tag = tag, }, @@ -1334,7 +1422,11 @@ const EmitZIR = struct { fn emitStringLiteral(self: *EmitZIR, src: usize, str: []const u8) !*Inst { const str_inst = try self.arena.allocator.create(Inst.Str); str_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Str.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Str.base_tag, + }, .positionals = .{ .bytes = str, }, diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 865ff609b1..641b2a504d 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -153,7 +153,7 @@ pub const ElfFile = struct { }; pub const Export = struct { - sym_index: usize, + sym_index: ?usize = null, }; pub fn deinit(self: *ElfFile) void { @@ -249,6 +249,11 @@ pub const ElfFile = struct { return @intCast(u32, result); } + fn getString(self: *ElfFile, str_off: u32) []const u8 { + assert(str_off < self.shstrtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.shstrtab.items.ptr + str_off)); + } + fn updateString(self: *ElfFile, old_str_off: u32, new_name: []const u8) !u32 { const existing_name = self.getString(old_str_off); if (mem.eql(u8, existing_name, new_name)) { @@ -418,6 +423,14 @@ pub const ElfFile = struct { const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); if (self.phdr_table_dirty) { + const phsize: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Phdr), + .p64 => @sizeOf(elf.Elf64_Phdr), + }; + const phalign: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Phdr), + .p64 => @alignOf(elf.Elf64_Phdr), + }; const allocated_size = self.allocatedSize(self.phdr_table_offset.?); const needed_size = self.program_headers.items.len * phsize; @@ -426,11 +439,10 @@ pub const ElfFile = struct { self.phdr_table_offset = self.findFreeSpace(needed_size, phalign); } - const allocator = self.program_headers.allocator; switch (self.ptr_width) { .p32 => { - const buf = try allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len); + defer self.allocator.free(buf); for (buf) |*phdr, i| { phdr.* = progHeaderTo32(self.program_headers.items[i]); @@ -441,8 +453,8 @@ pub const ElfFile = struct { try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, .p64 => { - const buf = try allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len); + defer self.allocator.free(buf); for (buf) |*phdr, i| { phdr.* = self.program_headers.items[i]; @@ -478,12 +490,20 @@ pub const ElfFile = struct { } } if (self.shdr_table_dirty) { + const shsize: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Shdr), + .p64 => @sizeOf(elf.Elf64_Shdr), + }; + const shalign: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Shdr), + .p64 => @alignOf(elf.Elf64_Shdr), + }; const allocated_size = self.allocatedSize(self.shdr_table_offset.?); - const needed_size = self.sections.items.len * phsize; + const needed_size = self.sections.items.len * shsize; if (needed_size > allocated_size) { self.shdr_table_offset = null; // free the space - self.shdr_table_offset = self.findFreeSpace(needed_size, phalign); + self.shdr_table_offset = self.findFreeSpace(needed_size, shalign); } switch (self.ptr_width) { @@ -719,7 +739,7 @@ pub const ElfFile = struct { defer code.deinit(); const typed_value = decl.typed_value.most_recent.typed_value; - const err_msg = try codegen.generateSymbol(typed_value, module, &code); + const err_msg = try codegen.generateSymbol(typed_value, module.*, &code); if (err_msg != null) |em| { decl.analysis = .codegen_failure; _ = try module.failed_decls.put(decl, em); @@ -751,15 +771,15 @@ pub const ElfFile = struct { try self.writeSymbol(decl.link.local_sym_index); break :blk file_offset; } else { - try self.symbols.ensureCapacity(self.symbols.items.len + 1); - try self.offset_table.ensureCapacity(self.offset_table.items.len + 1); + try self.symbols.ensureCapacity(self.allocator, self.symbols.items.len + 1); + try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); const decl_name = mem.spanZ(u8, decl.name); const name_str_index = try self.makeString(decl_name); const new_block = try self.allocateTextBlock(code_size); const local_sym_index = self.symbols.items.len; const offset_table_index = self.offset_table.items.len; - self.symbols.appendAssumeCapacity(self.allocator, .{ + self.symbols.appendAssumeCapacity(.{ .st_name = name_str_index, .st_info = (elf.STB_LOCAL << 4) | stt_bits, .st_other = 0, @@ -767,9 +787,9 @@ pub const ElfFile = struct { .st_value = new_block.vaddr, .st_size = code_size, }); - errdefer self.symbols.shrink(self.symbols.items.len - 1); - self.offset_table.appendAssumeCapacity(self.allocator, new_block.vaddr); - errdefer self.offset_table.shrink(self.offset_table.items.len - 1); + errdefer self.symbols.shrink(self.allocator, self.symbols.items.len - 1); + self.offset_table.appendAssumeCapacity(new_block.vaddr); + errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); try self.writeSymbol(local_sym_index); try self.writeOffsetTableEntry(offset_table_index); @@ -796,11 +816,12 @@ pub const ElfFile = struct { self: *ElfFile, module: *ir.Module, decl: *const ir.Module.Decl, - exports: []const *const Export, + exports: []const *ir.Module.Export, ) !void { - try self.symbols.ensureCapacity(self.symbols.items.len + exports.len); + try self.symbols.ensureCapacity(self.allocator, self.symbols.items.len + exports.len); const typed_value = decl.typed_value.most_recent.typed_value; - const decl_sym = self.symbols.items[decl.link.local_sym_index.?]; + assert(decl.link.local_sym_index != 0); + const decl_sym = self.symbols.items[decl.link.local_sym_index]; for (exports) |exp| { if (exp.options.section) |section_name| { @@ -808,15 +829,16 @@ pub const ElfFile = struct { try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); module.failed_exports.putAssumeCapacityNoClobber( exp, - try ir.ErrorMsg.create(0, "Unimplemented: ExportOptions.section", .{}), + try ir.ErrorMsg.create(self.allocator, 0, "Unimplemented: ExportOptions.section", .{}), ); + continue; } } - const stb_bits = switch (exp.options.linkage) { + const stb_bits: u8 = switch (exp.options.linkage) { .Internal => elf.STB_LOCAL, .Strong => blk: { if (mem.eql(u8, exp.options.name, "_start")) { - self.entry_addr = decl_symbol.vaddr; + self.entry_addr = decl_sym.st_value; } break :blk elf.STB_GLOBAL; }, @@ -825,8 +847,9 @@ pub const ElfFile = struct { try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); module.failed_exports.putAssumeCapacityNoClobber( exp, - try ir.ErrorMsg.create(0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), + try ir.ErrorMsg.create(self.allocator, 0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), ); + continue; }, }; const stt_bits: u8 = @truncate(u4, decl_sym.st_info); @@ -844,15 +867,15 @@ pub const ElfFile = struct { } else { const name = try self.makeString(exp.options.name); const i = self.symbols.items.len; - self.symbols.appendAssumeCapacity(self.allocator, .{ - .st_name = sn.name, + self.symbols.appendAssumeCapacity(.{ + .st_name = name, .st_info = (stb_bits << 4) | stt_bits, .st_other = 0, .st_shndx = self.text_section_index.?, .st_value = decl_sym.st_value, .st_size = decl_sym.st_size, }); - errdefer self.symbols.shrink(self.symbols.items.len - 1); + errdefer self.symbols.shrink(self.allocator, self.symbols.items.len - 1); try self.writeSymbol(i); self.symbol_count_dirty = true; @@ -946,10 +969,15 @@ pub const ElfFile = struct { } fn writeSymbol(self: *ElfFile, index: usize) !void { + assert(index != 0); const syms_sect = &self.sections.items[self.symtab_section_index.?]; // Make sure we are not pointlessly writing symbol data that will have to get relocated // due to running out of space. if (self.symbol_count_dirty) { + const sym_size: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Sym), + .p64 => @sizeOf(elf.Elf64_Sym), + }; const allocated_size = self.allocatedSize(syms_sect.sh_offset); const needed_size = self.symbols.items.len * sym_size; if (needed_size > allocated_size) { @@ -990,11 +1018,15 @@ pub const ElfFile = struct { } fn writeAllSymbols(self: *ElfFile) !void { - const small_ptr = self.ptr_width == .p32; const syms_sect = &self.sections.items[self.symtab_section_index.?]; - const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); - const sym_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); - + const sym_align: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Sym), + .p64 => @alignOf(elf.Elf64_Sym), + }; + const sym_size: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Sym), + .p64 => @sizeOf(elf.Elf64_Sym), + }; const allocated_size = self.allocatedSize(syms_sect.sh_offset); const needed_size = self.symbols.items.len * sym_size; if (needed_size > allocated_size) { diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 387df6fe1b..95d25770dd 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -67,6 +67,7 @@ pub const Value = extern union { int_big_positive, int_big_negative, function, + ref_val, decl_ref, elem_ptr, bytes, @@ -158,6 +159,11 @@ pub const Value = extern union { .int_big_positive => return out_stream.print("{}", .{val.cast(Payload.IntBigPositive).?.asBigInt()}), .int_big_negative => return out_stream.print("{}", .{val.cast(Payload.IntBigNegative).?.asBigInt()}), .function => return out_stream.writeAll("(function)"), + .ref_val => { + const ref_val = val.cast(Payload.RefVal).?; + try out_stream.writeAll("&const "); + val = ref_val.val; + }, .decl_ref => return out_stream.writeAll("(decl ref)"), .elem_ptr => { const elem_ptr = val.cast(Payload.ElemPtr).?; @@ -229,6 +235,7 @@ pub const Value = extern union { .int_big_positive, .int_big_negative, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -276,6 +283,7 @@ pub const Value = extern union { .bool_false, .null_value, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -333,6 +341,7 @@ pub const Value = extern union { .bool_false, .null_value, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -391,6 +400,7 @@ pub const Value = extern union { .bool_false, .null_value, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -454,6 +464,7 @@ pub const Value = extern union { .bool_false, .null_value, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -546,6 +557,7 @@ pub const Value = extern union { .bool_false, .null_value, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -600,6 +612,7 @@ pub const Value = extern union { .bool_false, .null_value, .function, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -655,7 +668,8 @@ pub const Value = extern union { } /// Asserts the value is a pointer and dereferences it. - pub fn pointerDeref(self: Value, module: *ir.Module) !Value { + /// Returns error.AnalysisFail if the pointer points to a Decl that failed semantic analysis. + pub fn pointerDeref(self: Value, allocator: *Allocator) error{ AnalysisFail, OutOfMemory }!Value { return switch (self.tag()) { .ty, .u8_type, @@ -704,21 +718,19 @@ pub const Value = extern union { => unreachable, .the_one_possible_value => Value.initTag(.the_one_possible_value), - .decl_ref => { - const index = self.cast(Payload.DeclRef).?.index; - return module.getDeclValue(index); - }, + .ref_val => self.cast(Payload.RefVal).?.val, + .decl_ref => self.cast(Payload.DeclRef).?.decl.value(), .elem_ptr => { - const elem_ptr = self.cast(ElemPtr).?; - const array_val = try elem_ptr.array_ptr.pointerDeref(module); - return self.elemValue(array_val, elem_ptr.index); + const elem_ptr = self.cast(Payload.ElemPtr).?; + const array_val = try elem_ptr.array_ptr.pointerDeref(allocator); + return array_val.elemValue(allocator, elem_ptr.index); }, }; } /// Asserts the value is a single-item pointer to an array, or an array, /// or an unknown-length pointer, and returns the element value at the index. - pub fn elemValue(self: Value, index: usize) Value { + pub fn elemValue(self: Value, allocator: *Allocator, index: usize) error{OutOfMemory}!Value { switch (self.tag()) { .ty, .u8_type, @@ -764,6 +776,7 @@ pub const Value = extern union { .int_big_negative, .undef, .elem_ptr, + .ref_val, .decl_ref, => unreachable, @@ -838,6 +851,7 @@ pub const Value = extern union { .int_i64, .int_big_positive, .int_big_negative, + .ref_val, .decl_ref, .elem_ptr, .bytes, @@ -896,11 +910,16 @@ pub const Value = extern union { elem_type: *Type, }; + /// Represents a pointer to another immutable value. + pub const RefVal = struct { + base: Payload = Payload{ .tag = .ref_val }, + val: Value, + }; + /// Represents a pointer to a decl, not the value of the decl. pub const DeclRef = struct { base: Payload = Payload{ .tag = .decl_ref }, - /// Index into the Module's decls list - index: usize, + decl: *ir.Module.Decl, }; pub const ElemPtr = struct { From 6a2425c38c2a776d2aafd68b25da8c4c1164f614 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 13 May 2020 22:12:38 -0400 Subject: [PATCH 08/31] self-hosted: fix the rest of the compile errors --- lib/std/fs/file.zig | 11 ++++++--- src-self-hosted/codegen.zig | 47 ++++++++++++++++++----------------- src-self-hosted/link.zig | 49 ++++++++++++++++++++----------------- 3 files changed, 57 insertions(+), 50 deletions(-) diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index a33d0d8e3e..9d72733395 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -527,19 +527,21 @@ pub const File = struct { } } - pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) PWriteError!usize { + pub const CopyRangeError = PWriteError || PReadError; + + pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) CopyRangeError!usize { // TODO take advantage of copy_file_range OS APIs var buf: [8 * 4096]u8 = undefined; const adjusted_count = math.min(buf.len, len); const amt_read = try in.pread(buf[0..adjusted_count], in_offset); - if (amt_read == 0) return 0; + if (amt_read == 0) return @as(usize, 0); return out.pwrite(buf[0..amt_read], out_offset); } /// Returns the number of bytes copied. If the number read is smaller than `buffer.len`, it /// means the in file reached the end. Reaching the end of a file is not an error condition. - pub fn copyRangeAll(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) PWriteError!usize { - var total_bytes_copied = 0; + pub fn copyRangeAll(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) CopyRangeError!usize { + var total_bytes_copied: usize = 0; var in_off = in_offset; var out_off = out_offset; while (total_bytes_copied < len) { @@ -549,6 +551,7 @@ pub const File = struct { in_off += amt_copied; out_off += amt_copied; } + return total_bytes_copied; } pub const WriteFileOptions = struct { diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 034ff9ffaa..09f722c1f1 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -5,16 +5,17 @@ const ir = @import("ir.zig"); const Type = @import("type.zig").Type; const Value = @import("value.zig").Value; const TypedValue = @import("TypedValue.zig"); +const link = @import("link.zig"); const Target = std.Target; const Allocator = mem.Allocator; -pub fn generateSymbol(typed_value: TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !?*ir.ErrorMsg { +pub fn generateSymbol(bin_file: *link.ElfFile, typed_value: TypedValue, code: *std.ArrayList(u8)) !?*ir.ErrorMsg { switch (typed_value.ty.zigTypeTag()) { .Fn => { const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; var function = Function{ - .module = &module, + .target = &bin_file.options.target, .mod_fn = module_fn, .code = code, .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator), @@ -22,7 +23,7 @@ pub fn generateSymbol(typed_value: TypedValue, module: ir.Module, code: *std.Arr }; defer function.inst_table.deinit(); - for (module_fn.body.instructions) |inst| { + for (module_fn.analysis.success.instructions) |inst| { const new_inst = function.genFuncInst(inst) catch |err| switch (err) { error.CodegenFail => { assert(function.err_msg != null); @@ -40,7 +41,7 @@ pub fn generateSymbol(typed_value: TypedValue, module: ir.Module, code: *std.Arr } const Function = struct { - module: *const ir.Module, + target: *const std.Target, mod_fn: *const ir.Module.Fn, code: *std.ArrayList(u8), inst_table: std.AutoHashMap(*ir.Inst, MCValue), @@ -76,60 +77,60 @@ const Function = struct { } fn genBreakpoint(self: *Function, src: usize) !MCValue { - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .i386, .x86_64 => { try self.code.append(0xcc); // int3 }, - else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.module.target.cpu.arch}), + else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}), } return .unreach; } fn genCall(self: *Function, inst: *ir.Inst.Call) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), } return .unreach; } fn genRet(self: *Function, inst: *ir.Inst.Ret) !MCValue { - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .i386, .x86_64 => { try self.code.append(0xc3); // ret }, - else => return self.fail(inst.base.src, "TODO implement return for {}", .{self.module.target.cpu.arch}), + else => return self.fail(inst.base.src, "TODO implement return for {}", .{self.target.cpu.arch}), } return .unreach; } fn genCmp(self: *Function, inst: *ir.Inst.Cmp) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.target.cpu.arch}), } } fn genCondBr(self: *Function, inst: *ir.Inst.CondBr) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}), } } fn genIsNull(self: *Function, inst: *ir.Inst.IsNull) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.target.cpu.arch}), } } fn genIsNonNull(self: *Function, inst: *ir.Inst.IsNonNull) !MCValue { // Here you can specialize this instruction if it makes sense to, otherwise the default // will call genIsNull and invert the result. - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { else => return self.fail(inst.base.src, "TODO call genIsNull and invert the result ", .{}), } } fn genRelativeFwdJump(self: *Function, src: usize, amount: u32) !void { - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .i386, .x86_64 => { // TODO x86 treats the operands as signed if (amount <= std.math.maxInt(u8)) { @@ -143,13 +144,13 @@ const Function = struct { mem.writeIntLittle(u32, imm_ptr, amount); } }, - else => return self.fail(src, "TODO implement relative forward jump for {}", .{self.module.target.cpu.arch}), + else => return self.fail(src, "TODO implement relative forward jump for {}", .{self.target.cpu.arch}), } } fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue { // TODO convert to inline function - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .arm => return self.genAsmArch(.arm, inst), .armeb => return self.genAsmArch(.armeb, inst), .aarch64 => return self.genAsmArch(.aarch64, inst), @@ -388,7 +389,7 @@ const Function = struct { } } - fn genTypedValue(self: *Function, src: usize, typed_value: ir.TypedValue) !MCValue { + fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue { switch (typed_value.ty.zigTypeTag()) { .Pointer => { const ptr_elem_type = typed_value.ty.elemType(); @@ -410,8 +411,8 @@ const Function = struct { } }, .Int => { - const info = typed_value.ty.intInfo(self.module.target); - const ptr_bits = self.module.target.cpu.arch.ptrBitWidth(); + const info = typed_value.ty.intInfo(self.target.*); + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); if (info.bits > ptr_bits or info.signed) { return self.fail(src, "TODO const int bigger than ptr and signed int", .{}); } diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 641b2a504d..e4cd0cd0ef 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -685,23 +685,20 @@ pub const ElfFile = struct { // TODO Also detect virtual address collisions. const text_capacity = self.allocatedSize(shdr.sh_offset); // TODO instead of looping here, maintain a free list and a pointer to the end. - const end_vaddr = blk: { - var start: u64 = 0; - var size: u64 = 0; - for (self.symbols.items) |sym| { - if (sym.st_value > start) { - start = sm.st_value; - size = sym.st_size; - } + var last_start: u64 = 0; + var last_size: u64 = 0; + for (self.symbols.items) |sym| { + if (sym.st_value > last_start) { + last_start = sym.st_value; + last_size = sym.st_size; } - break :blk start + (size * alloc_num / alloc_den); - }; - - const text_size = end_vaddr - phdr.p_vaddr; - const needed_size = text_size + new_block_size; + } + const end_vaddr = last_start + (last_size * alloc_num / alloc_den); + const needed_size = (end_vaddr + new_block_size) - phdr.p_vaddr; if (needed_size > text_capacity) { // Must move the entire text section. const new_offset = self.findFreeSpace(needed_size, 0x1000); + const text_size = (last_start + last_size) - phdr.p_vaddr; const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, text_size); if (amt != text_size) return error.InputOutput; shdr.sh_offset = new_offset; @@ -713,6 +710,12 @@ pub const ElfFile = struct { self.phdr_table_dirty = true; // TODO look into making only the one program header dirty self.shdr_table_dirty = true; // TODO look into making only the one section dirty + + return AllocatedBlock{ + .vaddr = end_vaddr, + .file_offset = shdr.sh_offset + (end_vaddr - phdr.p_vaddr), + .size_capacity = text_capacity - end_vaddr, + }; } fn findAllocatedTextBlock(self: *ElfFile, sym: elf.Elf64_Sym) AllocatedBlock { @@ -739,8 +742,8 @@ pub const ElfFile = struct { defer code.deinit(); const typed_value = decl.typed_value.most_recent.typed_value; - const err_msg = try codegen.generateSymbol(typed_value, module.*, &code); - if (err_msg != null) |em| { + const err_msg = try codegen.generateSymbol(self, typed_value, &code); + if (err_msg) |em| { decl.analysis = .codegen_failure; _ = try module.failed_decls.put(decl, em); return; @@ -755,7 +758,7 @@ pub const ElfFile = struct { if (decl.link.local_sym_index != 0) { const local_sym = &self.symbols.items[decl.link.local_sym_index]; - const existing_block = self.findAllocatedTextBlock(local_sym); + const existing_block = self.findAllocatedTextBlock(local_sym.*); const file_offset = if (code_size > existing_block.size_capacity) fo: { const new_block = try self.allocateTextBlock(code_size); local_sym.st_value = new_block.vaddr; @@ -765,7 +768,7 @@ pub const ElfFile = struct { break :fo new_block.file_offset; } else existing_block.file_offset; - local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(u8, decl.name)); + local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(decl.name)); local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; // TODO this write could be avoided if no fields of the symbol were changed. try self.writeSymbol(decl.link.local_sym_index); @@ -773,7 +776,7 @@ pub const ElfFile = struct { } else { try self.symbols.ensureCapacity(self.allocator, self.symbols.items.len + 1); try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); - const decl_name = mem.spanZ(u8, decl.name); + const decl_name = mem.spanZ(decl.name); const name_str_index = try self.makeString(decl_name); const new_block = try self.allocateTextBlock(code_size); const local_sym_index = self.symbols.items.len; @@ -796,8 +799,8 @@ pub const ElfFile = struct { self.symbol_count_dirty = true; self.offset_table_count_dirty = true; decl.link = .{ - .local_sym_index = local_sym_index, - .offset_table_index = offset_table_index, + .local_sym_index = @intCast(u32, local_sym_index), + .offset_table_index = @intCast(u32, offset_table_index), }; break :blk new_block.file_offset; @@ -807,7 +810,7 @@ pub const ElfFile = struct { try self.file.pwriteAll(code.items, file_offset); // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. - const decl_exports = module.decl_exports.get(decl) orelse &[0]*ir.Module.Export{}; + const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*ir.Module.Export{}; return self.updateDeclExports(module, decl, decl_exports); } @@ -938,9 +941,9 @@ pub const ElfFile = struct { const needed_size = self.symbols.items.len * shdr.sh_entsize; if (needed_size > allocated_size) { // Must move the entire got section. - const new_offset = self.findFreeSpace(needed_size, shdr.sh_entsize); + const new_offset = self.findFreeSpace(needed_size, @intCast(u16, shdr.sh_entsize)); const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, shdr.sh_size); - if (amt != text_size) return error.InputOutput; + if (amt != shdr.sh_size) return error.InputOutput; shdr.sh_offset = new_offset; } shdr.sh_size = needed_size; From fb947c365e95298a4619b8db2c0d40b9d69172f2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 13 May 2020 22:20:31 -0400 Subject: [PATCH 09/31] work around stage1 compiler bug breaking from inside the block with defers in scope triggered broken LLVM module found: Terminator found in the middle of a basic block! --- src-self-hosted/ir.zig | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index d7a2228f74..6efc3cdbc3 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -697,16 +697,9 @@ pub const Module = struct { }; } - fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { - // TODO use the cache to identify, from the modified source files, the decls which have - // changed based on the span of memory that represents the decl in the re-parsed source file. - // Use the cached dependency graph to recursively determine the set of decls which need - // regeneration. - // Here we simulate adding a source file which was previously not part of the compilation, - // which means scanning the decls looking for exports. - // TODO also identify decls that need to be deleted. - const src_module = switch (root_scope.status) { - .unloaded => blk: { + fn getTextModule(self: *Module, root_scope: *Scope.ZIRModule) !*text.Module { + switch (root_scope.status) { + .unloaded => { try self.failed_files.ensureCapacity(self.failed_files.size + 1); var keep_source = false; @@ -743,12 +736,23 @@ pub const Module = struct { root_scope.contents = .{ .module = zir_module }; keep_zir_module = true; - break :blk zir_module; + return zir_module; }, .unloaded_parse_failure, .loaded_parse_failure => return error.AnalysisFail, - .loaded_success => root_scope.contents.module, - }; + .loaded_success => return root_scope.contents.module, + } + } + + fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { + // TODO use the cache to identify, from the modified source files, the decls which have + // changed based on the span of memory that represents the decl in the re-parsed source file. + // Use the cached dependency graph to recursively determine the set of decls which need + // regeneration. + // Here we simulate adding a source file which was previously not part of the compilation, + // which means scanning the decls looking for exports. + // TODO also identify decls that need to be deleted. + const src_module = try self.getTextModule(root_scope); // Here we ensure enough queue capacity to store all the decls, so that later we can use // appendAssumeCapacity. From 0986dcf1cf5b67ad2b2e606622bf9b2c22e01194 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 14 May 2020 13:20:27 -0400 Subject: [PATCH 10/31] self-hosted: fix codegen and resolve some analysis bugs --- lib/std/fifo.zig | 37 ++++++----- src-self-hosted/codegen.zig | 87 +++++++++++++++++++++--- src-self-hosted/ir.zig | 127 ++++++++++++++++++++++++++++-------- src-self-hosted/ir/text.zig | 34 +++++++--- src-self-hosted/link.zig | 43 +++++++----- src-self-hosted/type.zig | 44 +++++++++++++ src-self-hosted/value.zig | 9 ++- 7 files changed, 302 insertions(+), 79 deletions(-) diff --git a/lib/std/fifo.zig b/lib/std/fifo.zig index 6bbec57072..de75130363 100644 --- a/lib/std/fifo.zig +++ b/lib/std/fifo.zig @@ -191,8 +191,8 @@ pub fn LinearFifo( } /// Read the next item from the fifo - pub fn readItem(self: *Self) !T { - if (self.count == 0) return error.EndOfStream; + pub fn readItem(self: *Self) ?T { + if (self.count == 0) return null; const c = self.buf[self.head]; self.discard(1); @@ -282,7 +282,10 @@ pub fn LinearFifo( /// Write a single item to the fifo pub fn writeItem(self: *Self, item: T) !void { try self.ensureUnusedCapacity(1); + return self.writeItemAssumeCapacity(item); + } + pub fn writeItemAssumeCapacity(self: *Self, item: T) void { var tail = self.head + self.count; if (powers_of_two) { tail &= self.buf.len - 1; @@ -342,10 +345,10 @@ pub fn LinearFifo( } } - /// Peek at the item at `offset` - pub fn peekItem(self: Self, offset: usize) error{EndOfStream}!T { - if (offset >= self.count) - return error.EndOfStream; + /// Returns the item at `offset`. + /// Asserts offset is within bounds. + pub fn peekItem(self: Self, offset: usize) T { + assert(offset < self.count); var index = self.head + offset; if (powers_of_two) { @@ -369,18 +372,18 @@ test "LinearFifo(u8, .Dynamic)" { { var i: usize = 0; while (i < 5) : (i += 1) { - try fifo.write(&[_]u8{try fifo.peekItem(i)}); + try fifo.write(&[_]u8{fifo.peekItem(i)}); } testing.expectEqual(@as(usize, 10), fifo.readableLength()); testing.expectEqualSlices(u8, "HELLOHELLO", fifo.readableSlice(0)); } { - testing.expectEqual(@as(u8, 'H'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'E'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'L'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'L'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'O'), try fifo.readItem()); + testing.expectEqual(@as(u8, 'H'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'E'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'L'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'L'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'O'), fifo.readItem().?); } testing.expectEqual(@as(usize, 5), fifo.readableLength()); @@ -451,11 +454,11 @@ test "LinearFifo" { testing.expectEqual(@as(usize, 5), fifo.readableLength()); { - testing.expectEqual(@as(T, 0), try fifo.readItem()); - testing.expectEqual(@as(T, 1), try fifo.readItem()); - testing.expectEqual(@as(T, 1), try fifo.readItem()); - testing.expectEqual(@as(T, 0), try fifo.readItem()); - testing.expectEqual(@as(T, 1), try fifo.readItem()); + testing.expectEqual(@as(T, 0), fifo.readItem().?); + testing.expectEqual(@as(T, 1), fifo.readItem().?); + testing.expectEqual(@as(T, 1), fifo.readItem().?); + testing.expectEqual(@as(T, 0), fifo.readItem().?); + testing.expectEqual(@as(T, 1), fifo.readItem().?); testing.expectEqual(@as(usize, 0), fifo.readableLength()); } diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 09f722c1f1..176bdc1128 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -9,7 +9,18 @@ const link = @import("link.zig"); const Target = std.Target; const Allocator = mem.Allocator; -pub fn generateSymbol(bin_file: *link.ElfFile, typed_value: TypedValue, code: *std.ArrayList(u8)) !?*ir.ErrorMsg { +pub const Result = union(enum) { + /// This value might or might not alias the `code` parameter passed to `generateSymbol`. + ok: []const u8, + fail: *ir.ErrorMsg, +}; + +pub fn generateSymbol( + bin_file: *link.ElfFile, + src: usize, + typed_value: TypedValue, + code: *std.ArrayList(u8), +) error{OutOfMemory}!Result { switch (typed_value.ty.zigTypeTag()) { .Fn => { const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; @@ -18,25 +29,77 @@ pub fn generateSymbol(bin_file: *link.ElfFile, typed_value: TypedValue, code: *s .target = &bin_file.options.target, .mod_fn = module_fn, .code = code, - .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator), + .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator), .err_msg = null, }; defer function.inst_table.deinit(); for (module_fn.analysis.success.instructions) |inst| { const new_inst = function.genFuncInst(inst) catch |err| switch (err) { - error.CodegenFail => { - assert(function.err_msg != null); - break; - }, + error.CodegenFail => return Result{ .fail = function.err_msg.? }, else => |e| return e, }; try function.inst_table.putNoClobber(inst, new_inst); } - return function.err_msg; + if (function.err_msg) |em| { + return Result{ .fail = em }; + } else { + return Result{ .ok = code.items }; + } + }, + .Array => { + if (typed_value.val.cast(Value.Payload.Bytes)) |payload| { + return Result{ .ok = payload.data }; + } + return Result{ + .fail = try ir.ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for more kinds of arrays", + .{}, + ), + }; + }, + .Pointer => { + if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { + const decl = payload.decl; + assert(decl.link.local_sym_index != 0); + // TODO handle the dependency of this symbol on the decl's vaddr. + // If the decl changes vaddr, then this symbol needs to get regenerated. + const vaddr = bin_file.symbols.items[decl.link.local_sym_index].st_value; + const endian = bin_file.options.target.cpu.arch.endian(); + switch (bin_file.ptr_width) { + .p32 => { + try code.resize(4); + mem.writeInt(u32, code.items[0..4], @intCast(u32, vaddr), endian); + }, + .p64 => { + try code.resize(8); + mem.writeInt(u64, code.items[0..8], vaddr, endian); + }, + } + return Result{ .ok = code.items }; + } + return Result{ + .fail = try ir.ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for pointer {}", + .{typed_value.val}, + ), + }; + }, + else => |t| { + return Result{ + .fail = try ir.ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for type '{}'", + .{@tagName(t)}, + ), + }; }, - else => @panic("TODO implement generateSymbol for non-function decls"), } } @@ -390,14 +453,18 @@ const Function = struct { } fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue { + const allocator = self.code.allocator; switch (typed_value.ty.zigTypeTag()) { .Pointer => { const ptr_elem_type = typed_value.ty.elemType(); switch (ptr_elem_type.zigTypeTag()) { .Array => { // TODO more checks to make sure this can be emitted as a string literal - const bytes = try typed_value.val.toAllocatedBytes(self.code.allocator); - defer self.code.allocator.free(bytes); + const bytes = typed_value.val.toAllocatedBytes(allocator) catch |err| switch (err) { + error.AnalysisFail => unreachable, + else => |e| return e, + }; + defer allocator.free(bytes); const smaller_len = std.math.cast(u32, bytes.len) catch return self.fail(src, "TODO handle a larger string constant", .{}); diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 6efc3cdbc3..17a72c4ee0 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -191,7 +191,7 @@ pub const Module = struct { optimize_mode: std.builtin.Mode, link_error_flags: link.ElfFile.ErrorFlags = link.ElfFile.ErrorFlags{}, - work_stack: ArrayListUnmanaged(WorkItem) = ArrayListUnmanaged(WorkItem){}, + work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic), /// We optimize memory usage for a compilation with no compile errors by storing the /// error messages and mapping outside of `Decl`. @@ -333,6 +333,15 @@ pub const Module = struct { return (try self.typedValue()).val; } + pub fn dump(self: *Decl) void { + self.scope.dumpSrc(self.src); + std.debug.warn(" name={} status={}", .{ mem.spanZ(self.name), @tagName(self.analysis) }); + if (self.typedValueManaged()) |tvm| { + std.debug.warn(" ty={} val={}", .{ tvm.typed_value.ty, tvm.typed_value.val }); + } + std.debug.warn("\n", .{}); + } + fn typedValueManaged(self: *Decl) ?*TypedValue.Managed { switch (self.analysis) { .initial_in_progress, @@ -359,7 +368,10 @@ pub const Module = struct { queued: *text.Inst.Fn, in_progress: *Analysis, /// There will be a corresponding ErrorMsg in Module.failed_decls - failure, + sema_failure, + /// This Fn might be OK but it depends on another Decl which did not successfully complete + /// semantic analysis. + dependency_failure, success: Body, }, @@ -390,7 +402,7 @@ pub const Module = struct { switch (self.tag) { .block => return self.cast(Block).?.arena, .decl => return &self.cast(DeclAnalysis).?.arena.allocator, - .zir_module => unreachable, + .zir_module => return &self.cast(ZIRModule).?.contents.module.arena.allocator, } } @@ -414,6 +426,18 @@ pub const Module = struct { } } + pub fn dumpInst(self: *Scope, inst: *Inst) void { + const zir_module = self.namespace(); + const loc = std.zig.findLineColumn(zir_module.source.bytes, inst.src); + std.debug.warn("{}:{}:{}: {}: ty={}\n", .{ + zir_module.sub_file_path, + loc.line + 1, + loc.column + 1, + @tagName(inst.tag), + inst.ty, + }); + } + pub const Tag = enum { zir_module, block, @@ -438,6 +462,7 @@ pub const Module = struct { unloaded, unloaded_parse_failure, loaded_parse_failure, + loaded_sema_failure, loaded_success, }, @@ -446,7 +471,7 @@ pub const Module = struct { .unloaded, .unloaded_parse_failure, => {}, - .loaded_success => { + .loaded_success, .loaded_sema_failure => { allocator.free(self.source.bytes); self.contents.module.deinit(allocator); }, @@ -456,6 +481,11 @@ pub const Module = struct { } self.* = undefined; } + + pub fn dumpSrc(self: *ZIRModule, src: usize) void { + const loc = std.zig.findLineColumn(self.source.bytes, src); + std.debug.warn("{}:{}:{}\n", .{ self.sub_file_path, loc.line + 1, loc.column + 1 }); + } }; /// This is a temporary structure, references to it are valid only @@ -520,7 +550,7 @@ pub const Module = struct { pub fn deinit(self: *Module) void { const allocator = self.allocator; - self.work_stack.deinit(allocator); + self.work_queue.deinit(); { var it = self.decl_table.iterator(); while (it.next()) |kv| { @@ -587,6 +617,8 @@ pub const Module = struct { try self.performAllTheWork(); + // TODO unload all the source files from memory + try self.bin_file.flush(); self.link_error_flags = self.bin_file.error_flags; } @@ -654,7 +686,7 @@ pub const Module = struct { const InnerError = error{ OutOfMemory, AnalysisFail }; pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { - while (self.work_stack.popOrNull()) |work_item| switch (work_item) { + while (self.work_queue.readItem()) |work_item| switch (work_item) { .codegen_decl => |decl| switch (decl.analysis) { .initial_in_progress, .repeat_in_progress, @@ -671,14 +703,22 @@ pub const Module = struct { if (decl.typed_value.most_recent.typed_value.val.cast(Value.Payload.Function)) |payload| { switch (payload.func.analysis) { .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { - error.AnalysisFail => continue, + error.AnalysisFail => { + if (payload.func.analysis == .queued) { + payload.func.analysis = .dependency_failure; + } + continue; + }, else => |e| return e, }, .in_progress => unreachable, - .failure => continue, + .sema_failure, .dependency_failure => continue, .success => {}, } } + if (!decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()) + continue; + self.bin_file.updateDecl(self, decl) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, else => { @@ -739,7 +779,10 @@ pub const Module = struct { return zir_module; }, - .unloaded_parse_failure, .loaded_parse_failure => return error.AnalysisFail, + .unloaded_parse_failure, + .loaded_parse_failure, + .loaded_sema_failure, + => return error.AnalysisFail, .loaded_success => return root_scope.contents.module, } } @@ -756,14 +799,11 @@ pub const Module = struct { // Here we ensure enough queue capacity to store all the decls, so that later we can use // appendAssumeCapacity. - try self.work_stack.ensureCapacity( - self.allocator, - self.work_stack.items.len + src_module.decls.len, - ); + try self.work_queue.ensureUnusedCapacity(src_module.decls.len); for (src_module.decls) |decl| { if (decl.cast(text.Inst.Export)) |export_inst| { - try analyzeExport(self, &root_scope.base, export_inst); + _ = try self.resolveDecl(&root_scope.base, &export_inst.base); } } } @@ -825,10 +865,19 @@ pub const Module = struct { }; errdefer decl_scope.arena.deinit(); + const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + switch (new_decl.analysis) { + .initial_in_progress => new_decl.analysis = .initial_dependency_failure, + .repeat_in_progress => new_decl.analysis = .repeat_dependency_failure, + else => {}, + } + return error.AnalysisFail; + }, + }; const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); - const typed_value = try self.analyzeInstConst(&decl_scope.base, old_inst); - arena_state.* = decl_scope.arena.state; new_decl.typed_value = .{ @@ -839,7 +888,7 @@ pub const Module = struct { }; new_decl.analysis = .complete; // We ensureCapacity when scanning for decls. - self.work_stack.appendAssumeCapacity(.{ .codegen_decl = new_decl }); + self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); return new_decl; } } @@ -1021,11 +1070,8 @@ pub const Module = struct { } fn constStr(self: *Module, scope: *Scope, src: usize, str: []const u8) !*Inst { - const array_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); - array_payload.* = .{ .len = str.len }; - - const ty_payload = try scope.arena().create(Type.Payload.SingleConstPointer); - ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) }; + const ty_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); + ty_payload.* = .{ .len = str.len }; const bytes_payload = try scope.arena().create(Value.Payload.Bytes); bytes_payload.* = .{ .data = str }; @@ -1150,6 +1196,7 @@ pub const Module = struct { return self.constVoid(scope, old_inst.src); }, .primitive => return self.analyzeInstPrimitive(scope, old_inst.cast(text.Inst.Primitive).?), + .ref => return self.analyzeInstRef(scope, old_inst.cast(text.Inst.Ref).?), .fntype => return self.analyzeInstFnType(scope, old_inst.cast(text.Inst.FnType).?), .intcast => return self.analyzeInstIntCast(scope, old_inst.cast(text.Inst.IntCast).?), .bitcast => return self.analyzeInstBitCast(scope, old_inst.cast(text.Inst.BitCast).?), @@ -1167,12 +1214,34 @@ pub const Module = struct { return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); } + fn analyzeInstRef(self: *Module, scope: *Scope, inst: *text.Inst.Ref) InnerError!*Inst { + const decl = try self.resolveCompleteDecl(scope, inst.positionals.operand); + return self.analyzeDeclRef(scope, inst.base.src, decl); + } + fn analyzeInstDeclRef(self: *Module, scope: *Scope, inst: *text.Inst.DeclRef) InnerError!*Inst { - return self.fail(scope, inst.base.src, "TODO implement analyzeInstDeclFef", .{}); + const decl_name = try self.resolveConstString(scope, inst.positionals.name); + // This will need to get more fleshed out when there are proper structs & namespaces. + const zir_module = scope.namespace(); + for (zir_module.contents.module.decls) |src_decl| { + if (mem.eql(u8, src_decl.name, decl_name)) { + const decl = try self.resolveCompleteDecl(scope, src_decl); + return self.analyzeDeclRef(scope, inst.base.src, decl); + } + } + return self.fail(scope, inst.positionals.name.src, "use of undeclared identifier '{}'", .{decl_name}); } fn analyzeDeclRef(self: *Module, scope: *Scope, src: usize, decl: *Decl) InnerError!*Inst { - return self.fail(scope, src, "TODO implement analyzeDeclRef", .{}); + const decl_tv = try decl.typedValue(); + const ty_payload = try scope.arena().create(Type.Payload.SingleConstPointer); + ty_payload.* = .{ .pointee_type = decl_tv.ty }; + const val_payload = try scope.arena().create(Value.Payload.DeclRef); + val_payload.* = .{ .decl = decl }; + return self.constInst(scope, src, .{ + .ty = Type.initPayload(&ty_payload.base), + .val = Value.initPayload(&val_payload.base), + }); } fn analyzeInstCall(self: *Module, scope: *Scope, inst: *text.Inst.Call) InnerError!*Inst { @@ -1929,6 +1998,7 @@ pub const Module = struct { fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { @setCold(true); try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_files.ensureCapacity(self.failed_files.size + 1); const err_msg = try ErrorMsg.create(self.allocator, src, format, args); switch (scope.tag) { .decl => { @@ -1942,10 +2012,14 @@ pub const Module = struct { }, .block => { const block = scope.cast(Scope.Block).?; - block.func.analysis = .failure; + block.func.analysis = .sema_failure; self.failed_decls.putAssumeCapacityNoClobber(block.decl, err_msg); }, - .zir_module => unreachable, + .zir_module => { + const zir_module = scope.cast(Scope.ZIRModule).?; + zir_module.status = .loaded_sema_failure; + self.failed_files.putAssumeCapacityNoClobber(zir_module, err_msg); + }, } return error.AnalysisFail; } @@ -2044,6 +2118,7 @@ pub fn main() anyerror!void { .failed_decls = std.AutoHashMap(*Module.Decl, *ErrorMsg).init(allocator), .failed_files = std.AutoHashMap(*Module.Scope.ZIRModule, *ErrorMsg).init(allocator), .failed_exports = std.AutoHashMap(*Module.Export, *ErrorMsg).init(allocator), + .work_queue = std.fifo.LinearFifo(Module.WorkItem, .Dynamic).init(allocator), }; }; defer module.deinit(); diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index 6c64c6c9de..8f189f49e3 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -24,8 +24,7 @@ pub const Inst = struct { breakpoint, call, /// Represents a reference to a global decl by name. - /// Canonicalized ZIR will not have any of these. The - /// syntax `@foo` is equivalent to `declref("foo")`. + /// The syntax `@foo` is equivalent to `declref("foo")`. declref, str, int, @@ -39,6 +38,7 @@ pub const Inst = struct { @"fn", @"export", primitive, + ref, fntype, intcast, bitcast, @@ -67,6 +67,7 @@ pub const Inst = struct { .@"fn" => Fn, .@"export" => Export, .primitive => Primitive, + .ref => Ref, .fntype => FnType, .intcast => IntCast, .bitcast => BitCast, @@ -234,6 +235,16 @@ pub const Inst = struct { kw_args: struct {}, }; + pub const Ref = struct { + pub const base_tag = Tag.ref; + base: Inst, + + positionals: struct { + operand: *Inst, + }, + kw_args: struct {}, + }; + pub const Primitive = struct { pub const base_tag = Tag.primitive; base: Inst, @@ -407,7 +418,7 @@ pub const ErrorMsg = struct { pub const Module = struct { decls: []*Inst, - arena: std.heap.ArenaAllocator.State, + arena: std.heap.ArenaAllocator, error_msg: ?ErrorMsg = null, pub const Body = struct { @@ -416,7 +427,7 @@ pub const Module = struct { pub fn deinit(self: *Module, allocator: *Allocator) void { allocator.free(self.decls); - self.arena.promote(allocator).deinit(); + self.arena.deinit(); self.* = undefined; } @@ -475,6 +486,7 @@ pub const Module = struct { .@"return" => return self.writeInstToStreamGeneric(stream, .@"return", decl, inst_table), .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table), .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table), + .ref => return self.writeInstToStreamGeneric(stream, .ref, decl, inst_table), .primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table), .fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table), .intcast => return self.writeInstToStreamGeneric(stream, .intcast, decl, inst_table), @@ -591,7 +603,7 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module return Module{ .decls = parser.decls.toOwnedSlice(allocator), - .arena = parser.arena.state, + .arena = parser.arena, .error_msg = parser.error_msg, }; } @@ -630,7 +642,7 @@ const Parser = struct { skipSpace(self); try requireEatBytes(self, "="); skipSpace(self); - const inst = try parseInstruction(self, &body_context, ident[1..]); + const inst = try parseInstruction(self, &body_context, ident); const ident_index = body_context.instructions.items.len; if (try body_context.name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); @@ -716,7 +728,7 @@ const Parser = struct { skipSpace(self); try requireEatBytes(self, "="); skipSpace(self); - const inst = try parseInstruction(self, null, ident[1..]); + const inst = try parseInstruction(self, null, ident); const ident_index = self.decls.items.len; if (try self.global_name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); @@ -987,7 +999,7 @@ pub fn emit_zir(allocator: *Allocator, old_module: ir.Module) !Module { return Module{ .decls = ctx.decls.toOwnedSlice(allocator), - .arena = ctx.arena.state, + .arena = ctx.arena, }; } @@ -1056,6 +1068,7 @@ const EmitZIR = struct { } fn emitTypedValue(self: *EmitZIR, src: usize, typed_value: TypedValue) Allocator.Error!*Inst { + const allocator = &self.arena.allocator; switch (typed_value.ty.zigTypeTag()) { .Pointer => { const ptr_elem_type = typed_value.ty.elemType(); @@ -1067,7 +1080,10 @@ const EmitZIR = struct { // ptr_elem_type.hasSentinel(Value.initTag(.zero))) //{ //} - const bytes = try typed_value.val.toAllocatedBytes(&self.arena.allocator); + const bytes = typed_value.val.toAllocatedBytes(allocator) catch |err| switch (err) { + error.AnalysisFail => unreachable, + else => |e| return e, + }; return self.emitStringLiteral(src, bytes); }, else => |t| std.debug.panic("TODO implement emitTypedValue for pointer to {}", .{@tagName(t)}), diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index e4cd0cd0ef..c0110d8ae0 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -33,9 +33,11 @@ pub fn openBinFilePath( options: Options, ) !ElfFile { const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = determineMode(options) }); - defer file.close(); + errdefer file.close(); - return openBinFile(allocator, file, options); + var bin_file = try openBinFile(allocator, file, options); + bin_file.owns_file_handle = true; + return bin_file; } /// Atomically overwrites the old file, if present. @@ -89,6 +91,7 @@ pub fn openBinFile(allocator: *Allocator, file: fs.File, options: Options) !ElfF pub const ElfFile = struct { allocator: *Allocator, file: fs.File, + owns_file_handle: bool, options: Options, ptr_width: enum { p32, p64 }, @@ -162,6 +165,8 @@ pub const ElfFile = struct { self.shstrtab.deinit(self.allocator); self.symbols.deinit(self.allocator); self.offset_table.deinit(self.allocator); + if (self.owns_file_handle) + self.file.close(); } // `alloc_num / alloc_den` is the factor of padding when allocation @@ -685,7 +690,7 @@ pub const ElfFile = struct { // TODO Also detect virtual address collisions. const text_capacity = self.allocatedSize(shdr.sh_offset); // TODO instead of looping here, maintain a free list and a pointer to the end. - var last_start: u64 = 0; + var last_start: u64 = phdr.p_vaddr; var last_size: u64 = 0; for (self.symbols.items) |sym| { if (sym.st_value > last_start) { @@ -738,19 +743,21 @@ pub const ElfFile = struct { } pub fn updateDecl(self: *ElfFile, module: *ir.Module, decl: *ir.Module.Decl) !void { - var code = std.ArrayList(u8).init(self.allocator); - defer code.deinit(); + var code_buffer = std.ArrayList(u8).init(self.allocator); + defer code_buffer.deinit(); const typed_value = decl.typed_value.most_recent.typed_value; - const err_msg = try codegen.generateSymbol(self, typed_value, &code); - if (err_msg) |em| { - decl.analysis = .codegen_failure; - _ = try module.failed_decls.put(decl, em); - return; - } + const code = switch (try codegen.generateSymbol(self, decl.src, typed_value, &code_buffer)) { + .ok => |x| x, + .fail => |em| { + decl.analysis = .codegen_failure; + _ = try module.failed_decls.put(decl, em); + return; + }, + }; const file_offset = blk: { - const code_size = code.items.len; + const code_size = code.len; const stt_bits: u8 = switch (typed_value.ty.zigTypeTag()) { .Fn => elf.STT_FUNC, else => elf.STT_OBJECT, @@ -793,11 +800,13 @@ pub const ElfFile = struct { errdefer self.symbols.shrink(self.allocator, self.symbols.items.len - 1); self.offset_table.appendAssumeCapacity(new_block.vaddr); errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); - try self.writeSymbol(local_sym_index); - try self.writeOffsetTableEntry(offset_table_index); self.symbol_count_dirty = true; self.offset_table_count_dirty = true; + + try self.writeSymbol(local_sym_index); + try self.writeOffsetTableEntry(offset_table_index); + decl.link = .{ .local_sym_index = @intCast(u32, local_sym_index), .offset_table_index = @intCast(u32, offset_table_index), @@ -807,7 +816,7 @@ pub const ElfFile = struct { } }; - try self.file.pwriteAll(code.items, file_offset); + try self.file.pwriteAll(code, file_offset); // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*ir.Module.Export{}; @@ -823,7 +832,7 @@ pub const ElfFile = struct { ) !void { try self.symbols.ensureCapacity(self.allocator, self.symbols.items.len + exports.len); const typed_value = decl.typed_value.most_recent.typed_value; - assert(decl.link.local_sym_index != 0); + if (decl.link.local_sym_index == 0) return; const decl_sym = self.symbols.items[decl.link.local_sym_index]; for (exports) |exp| { @@ -1112,6 +1121,7 @@ pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !El else => return error.UnsupportedELFArchitecture, }, .shdr_table_dirty = true, + .owns_file_handle = false, }; errdefer self.deinit(); @@ -1161,6 +1171,7 @@ fn openBinFileInner(allocator: *Allocator, file: fs.File, options: Options) !Elf var self: ElfFile = .{ .allocator = allocator, .file = file, + .owns_file_handle = false, .options = options, .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { 32 => .p32, diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 283f0adc74..ba94726212 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -262,6 +262,50 @@ pub const Type = extern union { } } + pub fn hasCodeGenBits(self: Type) bool { + return switch (self.tag()) { + .u8, + .i8, + .isize, + .usize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .c_longdouble, + .f16, + .f32, + .f64, + .f128, + .bool, + .anyerror, + .fn_noreturn_no_args, + .fn_naked_noreturn_no_args, + .fn_ccc_void_no_args, + .single_const_pointer_to_comptime_int, + .const_slice_u8, // See last_no_payload_tag below. + .array_u8_sentinel_0, + .array, + .single_const_pointer, + .int_signed, + .int_unsigned, + => true, + + .c_void, + .void, + .type, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + => false, + }; + } + pub fn isSinglePointer(self: Type) bool { return switch (self.tag()) { .u8, diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 95d25770dd..00db2b5ccc 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -180,10 +180,17 @@ pub const Value = extern union { /// Asserts that the value is representable as an array of bytes. /// Copies the value into a freshly allocated slice of memory, which is owned by the caller. - pub fn toAllocatedBytes(self: Value, allocator: *Allocator) Allocator.Error![]u8 { + pub fn toAllocatedBytes(self: Value, allocator: *Allocator) ![]u8 { if (self.cast(Payload.Bytes)) |bytes| { return std.mem.dupe(allocator, u8, bytes.data); } + if (self.cast(Payload.Repeated)) |repeated| { + @panic("TODO implement toAllocatedBytes for this Value tag"); + } + if (self.cast(Payload.DeclRef)) |declref| { + const val = try declref.decl.value(); + return val.toAllocatedBytes(allocator); + } unreachable; } From 81a01bd4815779ebeb5898a825bf91628b75ff47 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 14 May 2020 16:34:04 -0400 Subject: [PATCH 11/31] fix codegen of sentinel-terminated arrays and .got alignment we now have an exit(0) program working --- lib/std/mem.zig | 6 +- src-self-hosted/codegen.zig | 54 ++++++++-- src-self-hosted/ir.zig | 13 ++- src-self-hosted/link.zig | 197 +++++++++++++++++++----------------- src-self-hosted/type.zig | 112 +++++++++++++++++++- 5 files changed, 275 insertions(+), 107 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 0b5a6adfd9..95d6b77e87 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -2099,7 +2099,11 @@ pub fn alignBackwardGeneric(comptime T: type, addr: T, alignment: T) T { /// Given an address and an alignment, return true if the address is a multiple of the alignment /// The alignment must be a power of 2 and greater than 0. pub fn isAligned(addr: usize, alignment: usize) bool { - return alignBackward(addr, alignment) == addr; + return isAlignedGeneric(u64, addr, alignment); +} + +pub fn isAlignedGeneric(comptime T: type, addr: T, alignment: T) bool { + return alignBackwardGeneric(T, addr, alignment) == addr; } test "isAligned" { diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 176bdc1128..b19a3f24f7 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -10,8 +10,10 @@ const Target = std.Target; const Allocator = mem.Allocator; pub const Result = union(enum) { - /// This value might or might not alias the `code` parameter passed to `generateSymbol`. - ok: []const u8, + /// The `code` parameter passed to `generateSymbol` has the value appended. + appended: void, + /// The value is available externally, `code` is unused. + externally_managed: []const u8, fail: *ir.ErrorMsg, }; @@ -20,7 +22,11 @@ pub fn generateSymbol( src: usize, typed_value: TypedValue, code: *std.ArrayList(u8), -) error{OutOfMemory}!Result { +) error{ + OutOfMemory, + /// A Decl that this symbol depends on had a semantic analysis failure. + AnalysisFail, +}!Result { switch (typed_value.ty.zigTypeTag()) { .Fn => { const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; @@ -45,12 +51,29 @@ pub fn generateSymbol( if (function.err_msg) |em| { return Result{ .fail = em }; } else { - return Result{ .ok = code.items }; + return Result{ .appended = {} }; } }, .Array => { if (typed_value.val.cast(Value.Payload.Bytes)) |payload| { - return Result{ .ok = payload.data }; + if (typed_value.ty.arraySentinel()) |sentinel| { + try code.ensureCapacity(code.items.len + payload.data.len + 1); + code.appendSliceAssumeCapacity(payload.data); + const prev_len = code.items.len; + switch (try generateSymbol(bin_file, src, .{ + .ty = typed_value.ty.elemType(), + .val = sentinel, + }, code)) { + .appended => return Result{ .appended = {} }, + .externally_managed => |slice| { + code.appendSliceAssumeCapacity(slice); + return Result{ .appended = {} }; + }, + .fail => |em| return Result{ .fail = em }, + } + } else { + return Result{ .externally_managed = payload.data }; + } } return Result{ .fail = try ir.ErrorMsg.create( @@ -64,10 +87,11 @@ pub fn generateSymbol( .Pointer => { if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { const decl = payload.decl; + if (decl.analysis != .complete) return error.AnalysisFail; assert(decl.link.local_sym_index != 0); // TODO handle the dependency of this symbol on the decl's vaddr. // If the decl changes vaddr, then this symbol needs to get regenerated. - const vaddr = bin_file.symbols.items[decl.link.local_sym_index].st_value; + const vaddr = bin_file.local_symbols.items[decl.link.local_sym_index].st_value; const endian = bin_file.options.target.cpu.arch.endian(); switch (bin_file.ptr_width) { .p32 => { @@ -79,7 +103,7 @@ pub fn generateSymbol( mem.writeInt(u64, code.items[0..8], vaddr, endian); }, } - return Result{ .ok = code.items }; + return Result{ .appended = {} }; } return Result{ .fail = try ir.ErrorMsg.create( @@ -90,6 +114,22 @@ pub fn generateSymbol( ), }; }, + .Int => { + const info = typed_value.ty.intInfo(bin_file.options.target); + if (info.bits == 8 and !info.signed) { + const x = typed_value.val.toUnsignedInt(); + try code.append(@intCast(u8, x)); + return Result{ .appended = {} }; + } + return Result{ + .fail = try ir.ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for int type '{}'", + .{typed_value.ty}, + ), + }; + }, else => |t| { return Result{ .fail = try ir.ErrorMsg.create( diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 17a72c4ee0..b357a62932 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -334,8 +334,14 @@ pub const Module = struct { } pub fn dump(self: *Decl) void { - self.scope.dumpSrc(self.src); - std.debug.warn(" name={} status={}", .{ mem.spanZ(self.name), @tagName(self.analysis) }); + const loc = std.zig.findLineColumn(self.scope.source.bytes, self.src); + std.debug.warn("{}:{}:{} name={} status={}", .{ + self.scope.sub_file_path, + loc.line + 1, + loc.column + 1, + mem.spanZ(self.name), + @tagName(self.analysis), + }); if (self.typedValueManaged()) |tvm| { std.debug.warn(" ty={} val={}", .{ tvm.typed_value.ty, tvm.typed_value.val }); } @@ -721,6 +727,9 @@ pub const Module = struct { self.bin_file.updateDecl(self, decl) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + decl.analysis = .repeat_dependency_failure; + }, else => { try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index c0110d8ae0..fb3953fe4f 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -118,8 +118,12 @@ pub const ElfFile = struct { symtab_section_index: ?u16 = null, got_section_index: ?u16 = null, - /// The same order as in the file - symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = std.ArrayListUnmanaged(elf.Elf64_Sym){}, + /// The same order as in the file. ELF requires global symbols to all be after the + /// local symbols, they cannot be mixed. So we must buffer all the global symbols and + /// write them at the end. These are only the local symbols. The length of this array + /// is the value used for sh_info in the .symtab section. + local_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = std.ArrayListUnmanaged(elf.Elf64_Sym){}, + global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = std.ArrayListUnmanaged(elf.Elf64_Sym){}, /// Same order as in the file. The value is the absolute vaddr value. /// If the vaddr of the executable program header changes, the entire @@ -130,7 +134,6 @@ pub const ElfFile = struct { shdr_table_dirty: bool = false, shstrtab_dirty: bool = false, offset_table_count_dirty: bool = false, - symbol_count_dirty: bool = false, error_flags: ErrorFlags = ErrorFlags{}, @@ -156,14 +159,15 @@ pub const ElfFile = struct { }; pub const Export = struct { - sym_index: ?usize = null, + sym_index: ?u32 = null, }; pub fn deinit(self: *ElfFile) void { self.sections.deinit(self.allocator); self.program_headers.deinit(self.allocator); self.shstrtab.deinit(self.allocator); - self.symbols.deinit(self.allocator); + self.local_symbols.deinit(self.allocator); + self.global_symbols.deinit(self.allocator); self.offset_table.deinit(self.allocator); if (self.owns_file_handle) self.file.close(); @@ -298,7 +302,10 @@ pub const ElfFile = struct { if (self.phdr_got_index == null) { self.phdr_got_index = @intCast(u16, self.program_headers.items.len); const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint; - const off = self.findFreeSpace(file_size, ptr_size); + // We really only need ptr alignment but since we are using PROGBITS, linux requires + // page align. + const p_align = 0x1000; + const off = self.findFreeSpace(file_size, p_align); //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. // we'll need to re-use that function anyway, in case the GOT grows and overlaps something @@ -311,7 +318,7 @@ pub const ElfFile = struct { .p_vaddr = default_got_addr, .p_paddr = default_got_addr, .p_memsz = file_size, - .p_align = ptr_size, + .p_align = p_align, .p_flags = elf.PF_R, }); self.phdr_table_dirty = true; @@ -369,7 +376,7 @@ pub const ElfFile = struct { .sh_link = 0, .sh_info = 0, .sh_addralign = phdr.p_align, - .sh_entsize = ptr_size, + .sh_entsize = 0, }); self.shdr_table_dirty = true; } @@ -390,12 +397,12 @@ pub const ElfFile = struct { .sh_size = file_size, // The section header index of the associated string table. .sh_link = self.shstrtab_index.?, - .sh_info = @intCast(u32, self.symbols.items.len), + .sh_info = @intCast(u32, self.local_symbols.items.len), .sh_addralign = min_align, .sh_entsize = each_size, }); self.shdr_table_dirty = true; - try self.writeAllSymbols(); + try self.writeSymbol(0); } const shsize: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Shdr), @@ -427,6 +434,10 @@ pub const ElfFile = struct { pub fn flush(self: *ElfFile) !void { const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + // Unfortunately these have to be buffered and done at the end because ELF does not allow + // mixing local and global symbols within a symbol table. + try self.writeAllGlobalSymbols(); + if (self.phdr_table_dirty) { const phsize: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Phdr), @@ -552,8 +563,9 @@ pub const ElfFile = struct { assert(!self.phdr_table_dirty); assert(!self.shdr_table_dirty); assert(!self.shstrtab_dirty); - assert(!self.symbol_count_dirty); assert(!self.offset_table_count_dirty); + const syms_sect = &self.sections.items[self.symtab_section_index.?]; + assert(syms_sect.sh_info == self.local_symbols.items.len); } fn writeElfHeader(self: *ElfFile) !void { @@ -683,7 +695,7 @@ pub const ElfFile = struct { size_capacity: u64, }; - fn allocateTextBlock(self: *ElfFile, new_block_size: u64) !AllocatedBlock { + fn allocateTextBlock(self: *ElfFile, new_block_size: u64, alignment: u64) !AllocatedBlock { const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; const shdr = &self.sections.items[self.text_section_index.?]; @@ -692,14 +704,15 @@ pub const ElfFile = struct { // TODO instead of looping here, maintain a free list and a pointer to the end. var last_start: u64 = phdr.p_vaddr; var last_size: u64 = 0; - for (self.symbols.items) |sym| { - if (sym.st_value > last_start) { + for (self.local_symbols.items) |sym| { + if (sym.st_value + sym.st_size > last_start + last_size) { last_start = sym.st_value; last_size = sym.st_size; } } const end_vaddr = last_start + (last_size * alloc_num / alloc_den); - const needed_size = (end_vaddr + new_block_size) - phdr.p_vaddr; + const aligned_start_vaddr = mem.alignForwardGeneric(u64, end_vaddr, alignment); + const needed_size = (aligned_start_vaddr + new_block_size) - phdr.p_vaddr; if (needed_size > text_capacity) { // Must move the entire text section. const new_offset = self.findFreeSpace(needed_size, 0x1000); @@ -717,9 +730,9 @@ pub const ElfFile = struct { self.shdr_table_dirty = true; // TODO look into making only the one section dirty return AllocatedBlock{ - .vaddr = end_vaddr, - .file_offset = shdr.sh_offset + (end_vaddr - phdr.p_vaddr), - .size_capacity = text_capacity - end_vaddr, + .vaddr = aligned_start_vaddr, + .file_offset = shdr.sh_offset + (aligned_start_vaddr - phdr.p_vaddr), + .size_capacity = text_capacity - needed_size, }; } @@ -731,7 +744,7 @@ pub const ElfFile = struct { // TODO look into using a hash map to speed up perf. const text_capacity = self.allocatedSize(shdr.sh_offset); var next_vaddr_start = phdr.p_vaddr + text_capacity; - for (self.symbols.items) |elem| { + for (self.local_symbols.items) |elem| { if (elem.st_value < sym.st_value) continue; if (elem.st_value < next_vaddr_start) next_vaddr_start = elem.st_value; } @@ -748,7 +761,8 @@ pub const ElfFile = struct { const typed_value = decl.typed_value.most_recent.typed_value; const code = switch (try codegen.generateSymbol(self, decl.src, typed_value, &code_buffer)) { - .ok => |x| x, + .externally_managed => |x| x, + .appended => code_buffer.items, .fail => |em| { decl.analysis = .codegen_failure; _ = try module.failed_decls.put(decl, em); @@ -756,20 +770,23 @@ pub const ElfFile = struct { }, }; + const required_alignment = typed_value.ty.abiAlignment(self.options.target); + const file_offset = blk: { - const code_size = code.len; const stt_bits: u8 = switch (typed_value.ty.zigTypeTag()) { .Fn => elf.STT_FUNC, else => elf.STT_OBJECT, }; if (decl.link.local_sym_index != 0) { - const local_sym = &self.symbols.items[decl.link.local_sym_index]; + const local_sym = &self.local_symbols.items[decl.link.local_sym_index]; const existing_block = self.findAllocatedTextBlock(local_sym.*); - const file_offset = if (code_size > existing_block.size_capacity) fo: { - const new_block = try self.allocateTextBlock(code_size); + const need_realloc = code.len > existing_block.size_capacity or + !mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment); + const file_offset = if (need_realloc) fo: { + const new_block = try self.allocateTextBlock(code.len, required_alignment); local_sym.st_value = new_block.vaddr; - local_sym.st_size = code_size; + local_sym.st_size = code.len; try self.writeOffsetTableEntry(decl.link.offset_table_index); @@ -781,27 +798,27 @@ pub const ElfFile = struct { try self.writeSymbol(decl.link.local_sym_index); break :blk file_offset; } else { - try self.symbols.ensureCapacity(self.allocator, self.symbols.items.len + 1); + try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); const decl_name = mem.spanZ(decl.name); const name_str_index = try self.makeString(decl_name); - const new_block = try self.allocateTextBlock(code_size); - const local_sym_index = self.symbols.items.len; + const new_block = try self.allocateTextBlock(code.len, required_alignment); + const local_sym_index = self.local_symbols.items.len; const offset_table_index = self.offset_table.items.len; - self.symbols.appendAssumeCapacity(.{ + //std.debug.warn("add symbol for {} at vaddr 0x{x}, size {}\n", .{ decl.name, new_block.vaddr, code.len }); + self.local_symbols.appendAssumeCapacity(.{ .st_name = name_str_index, .st_info = (elf.STB_LOCAL << 4) | stt_bits, .st_other = 0, .st_shndx = self.text_section_index.?, .st_value = new_block.vaddr, - .st_size = code_size, + .st_size = code.len, }); - errdefer self.symbols.shrink(self.allocator, self.symbols.items.len - 1); + errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); self.offset_table.appendAssumeCapacity(new_block.vaddr); errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); - self.symbol_count_dirty = true; self.offset_table_count_dirty = true; try self.writeSymbol(local_sym_index); @@ -830,10 +847,10 @@ pub const ElfFile = struct { decl: *const ir.Module.Decl, exports: []const *ir.Module.Export, ) !void { - try self.symbols.ensureCapacity(self.allocator, self.symbols.items.len + exports.len); + try self.global_symbols.ensureCapacity(self.allocator, self.global_symbols.items.len + exports.len); const typed_value = decl.typed_value.most_recent.typed_value; if (decl.link.local_sym_index == 0) return; - const decl_sym = self.symbols.items[decl.link.local_sym_index]; + const decl_sym = self.local_symbols.items[decl.link.local_sym_index]; for (exports) |exp| { if (exp.options.section) |section_name| { @@ -866,7 +883,7 @@ pub const ElfFile = struct { }; const stt_bits: u8 = @truncate(u4, decl_sym.st_info); if (exp.link.sym_index) |i| { - const sym = &self.symbols.items[i]; + const sym = &self.global_symbols.items[i]; sym.* = .{ .st_name = try self.updateString(sym.st_name, exp.options.name), .st_info = (stb_bits << 4) | stt_bits, @@ -875,11 +892,10 @@ pub const ElfFile = struct { .st_value = decl_sym.st_value, .st_size = decl_sym.st_size, }; - try self.writeSymbol(i); } else { const name = try self.makeString(exp.options.name); - const i = self.symbols.items.len; - self.symbols.appendAssumeCapacity(.{ + const i = self.global_symbols.items.len; + self.global_symbols.appendAssumeCapacity(.{ .st_name = name, .st_info = (stb_bits << 4) | stt_bits, .st_other = 0, @@ -887,11 +903,9 @@ pub const ElfFile = struct { .st_value = decl_sym.st_value, .st_size = decl_sym.st_size, }); - errdefer self.symbols.shrink(self.allocator, self.symbols.items.len - 1); - try self.writeSymbol(i); + errdefer self.global_symbols.shrink(self.allocator, self.global_symbols.items.len - 1); - self.symbol_count_dirty = true; - exp.link.sym_index = i; + exp.link.sym_index = @intCast(u32, i); } } } @@ -944,13 +958,17 @@ pub const ElfFile = struct { fn writeOffsetTableEntry(self: *ElfFile, index: usize) !void { const shdr = &self.sections.items[self.got_section_index.?]; const phdr = &self.program_headers.items[self.phdr_got_index.?]; + const entry_size: u16 = switch (self.ptr_width) { + .p32 => 4, + .p64 => 8, + }; if (self.offset_table_count_dirty) { // TODO Also detect virtual address collisions. const allocated_size = self.allocatedSize(shdr.sh_offset); - const needed_size = self.symbols.items.len * shdr.sh_entsize; + const needed_size = self.local_symbols.items.len * entry_size; if (needed_size > allocated_size) { // Must move the entire got section. - const new_offset = self.findFreeSpace(needed_size, @intCast(u16, shdr.sh_entsize)); + const new_offset = self.findFreeSpace(needed_size, entry_size); const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, shdr.sh_size); if (amt != shdr.sh_size) return error.InputOutput; shdr.sh_offset = new_offset; @@ -965,7 +983,7 @@ pub const ElfFile = struct { self.offset_table_count_dirty = false; } const endian = self.options.target.cpu.arch.endian(); - const off = shdr.sh_offset + shdr.sh_entsize * index; + const off = shdr.sh_offset + @as(u64, entry_size) * index; switch (self.ptr_width) { .p32 => { var buf: [4]u8 = undefined; @@ -981,35 +999,42 @@ pub const ElfFile = struct { } fn writeSymbol(self: *ElfFile, index: usize) !void { - assert(index != 0); const syms_sect = &self.sections.items[self.symtab_section_index.?]; // Make sure we are not pointlessly writing symbol data that will have to get relocated // due to running out of space. - if (self.symbol_count_dirty) { + if (self.local_symbols.items.len != syms_sect.sh_info) { const sym_size: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Sym), .p64 => @sizeOf(elf.Elf64_Sym), }; - const allocated_size = self.allocatedSize(syms_sect.sh_offset); - const needed_size = self.symbols.items.len * sym_size; - if (needed_size > allocated_size) { - return self.writeAllSymbols(); + const sym_align: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Sym), + .p64 => @alignOf(elf.Elf64_Sym), + }; + const needed_size = (self.local_symbols.items.len + self.global_symbols.items.len) * sym_size; + if (needed_size > self.allocatedSize(syms_sect.sh_offset)) { + // Move all the symbols to a new file location. + const new_offset = self.findFreeSpace(needed_size, sym_align); + const existing_size = @as(u64, syms_sect.sh_info) * sym_size; + const amt = try self.file.copyRangeAll(syms_sect.sh_offset, self.file, new_offset, existing_size); + if (amt != existing_size) return error.InputOutput; + syms_sect.sh_offset = new_offset; } - syms_sect.sh_info = @intCast(u32, self.symbols.items.len); + syms_sect.sh_info = @intCast(u32, self.local_symbols.items.len); + syms_sect.sh_size = needed_size; // anticipating adding the global symbols later self.shdr_table_dirty = true; // TODO look into only writing one section - self.symbol_count_dirty = false; } const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); switch (self.ptr_width) { .p32 => { var sym = [1]elf.Elf32_Sym{ .{ - .st_name = self.symbols.items[index].st_name, - .st_value = @intCast(u32, self.symbols.items[index].st_value), - .st_size = @intCast(u32, self.symbols.items[index].st_size), - .st_info = self.symbols.items[index].st_info, - .st_other = self.symbols.items[index].st_other, - .st_shndx = self.symbols.items[index].st_shndx, + .st_name = self.local_symbols.items[index].st_name, + .st_value = @intCast(u32, self.local_symbols.items[index].st_value), + .st_size = @intCast(u32, self.local_symbols.items[index].st_size), + .st_info = self.local_symbols.items[index].st_info, + .st_other = self.local_symbols.items[index].st_other, + .st_shndx = self.local_symbols.items[index].st_shndx, }, }; if (foreign_endian) { @@ -1019,7 +1044,7 @@ pub const ElfFile = struct { try self.file.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); }, .p64 => { - var sym = [1]elf.Elf64_Sym{self.symbols.items[index]}; + var sym = [1]elf.Elf64_Sym{self.local_symbols.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Sym, &sym[0]); } @@ -1029,67 +1054,53 @@ pub const ElfFile = struct { } } - fn writeAllSymbols(self: *ElfFile) !void { + fn writeAllGlobalSymbols(self: *ElfFile) !void { const syms_sect = &self.sections.items[self.symtab_section_index.?]; - const sym_align: u16 = switch (self.ptr_width) { - .p32 => @alignOf(elf.Elf32_Sym), - .p64 => @alignOf(elf.Elf64_Sym), - }; const sym_size: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Sym), .p64 => @sizeOf(elf.Elf64_Sym), }; - const allocated_size = self.allocatedSize(syms_sect.sh_offset); - const needed_size = self.symbols.items.len * sym_size; - if (needed_size > allocated_size) { - syms_sect.sh_size = 0; // free the space - syms_sect.sh_offset = self.findFreeSpace(needed_size, sym_align); - //std.debug.warn("moved symtab to 0x{x} to 0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size }); - } //std.debug.warn("symtab start=0x{x} end=0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size }); - syms_sect.sh_size = needed_size; - syms_sect.sh_info = @intCast(u32, self.symbols.items.len); - self.symbol_count_dirty = false; - self.shdr_table_dirty = true; // TODO look into only writing one section const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const global_syms_off = syms_sect.sh_offset + self.local_symbols.items.len * sym_size; switch (self.ptr_width) { .p32 => { - const buf = try self.allocator.alloc(elf.Elf32_Sym, self.symbols.items.len); + const buf = try self.allocator.alloc(elf.Elf32_Sym, self.global_symbols.items.len); defer self.allocator.free(buf); for (buf) |*sym, i| { sym.* = .{ - .st_name = self.symbols.items[i].st_name, - .st_value = @intCast(u32, self.symbols.items[i].st_value), - .st_size = @intCast(u32, self.symbols.items[i].st_size), - .st_info = self.symbols.items[i].st_info, - .st_other = self.symbols.items[i].st_other, - .st_shndx = self.symbols.items[i].st_shndx, + .st_name = self.global_symbols.items[i].st_name, + .st_value = @intCast(u32, self.global_symbols.items[i].st_value), + .st_size = @intCast(u32, self.global_symbols.items[i].st_size), + .st_info = self.global_symbols.items[i].st_info, + .st_other = self.global_symbols.items[i].st_other, + .st_shndx = self.global_symbols.items[i].st_shndx, }; if (foreign_endian) { bswapAllFields(elf.Elf32_Sym, sym); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); + try self.file.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); }, .p64 => { - const buf = try self.allocator.alloc(elf.Elf64_Sym, self.symbols.items.len); + const buf = try self.allocator.alloc(elf.Elf64_Sym, self.global_symbols.items.len); defer self.allocator.free(buf); for (buf) |*sym, i| { sym.* = .{ - .st_name = self.symbols.items[i].st_name, - .st_value = self.symbols.items[i].st_value, - .st_size = self.symbols.items[i].st_size, - .st_info = self.symbols.items[i].st_info, - .st_other = self.symbols.items[i].st_other, - .st_shndx = self.symbols.items[i].st_shndx, + .st_name = self.global_symbols.items[i].st_name, + .st_value = self.global_symbols.items[i].st_value, + .st_size = self.global_symbols.items[i].st_size, + .st_info = self.global_symbols.items[i].st_info, + .st_other = self.global_symbols.items[i].st_other, + .st_shndx = self.global_symbols.items[i].st_shndx, }; if (foreign_endian) { bswapAllFields(elf.Elf64_Sym, sym); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); + try self.file.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); }, } } @@ -1126,7 +1137,7 @@ pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !El errdefer self.deinit(); // Index 0 is always a null symbol. - try self.symbols.append(allocator, .{ + try self.local_symbols.append(allocator, .{ .st_name = 0, .st_info = 0, .st_other = 0, diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index ba94726212..84f1ed852d 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -287,12 +287,12 @@ pub const Type = extern union { .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer_to_comptime_int, - .const_slice_u8, // See last_no_payload_tag below. + .const_slice_u8, .array_u8_sentinel_0, - .array, + .array, // TODO check for zero bits .single_const_pointer, - .int_signed, - .int_unsigned, + .int_signed, // TODO check for zero bits + .int_unsigned, // TODO check for zero bits => true, .c_void, @@ -306,6 +306,66 @@ pub const Type = extern union { }; } + /// Asserts that hasCodeGenBits() is true. + pub fn abiAlignment(self: Type, target: Target) u32 { + return switch (self.tag()) { + .u8, + .i8, + .bool, + .fn_noreturn_no_args, // represents machine code; not a pointer + .fn_naked_noreturn_no_args, // represents machine code; not a pointer + .fn_ccc_void_no_args, // represents machine code; not a pointer + .array_u8_sentinel_0, + => return 1, + + .isize, + .usize, + .single_const_pointer_to_comptime_int, + .const_slice_u8, + .single_const_pointer, + => return @divExact(target.cpu.arch.ptrBitWidth(), 8), + + .c_short => return @divExact(CType.short.sizeInBits(target), 8), + .c_ushort => return @divExact(CType.ushort.sizeInBits(target), 8), + .c_int => return @divExact(CType.int.sizeInBits(target), 8), + .c_uint => return @divExact(CType.uint.sizeInBits(target), 8), + .c_long => return @divExact(CType.long.sizeInBits(target), 8), + .c_ulong => return @divExact(CType.ulong.sizeInBits(target), 8), + .c_longlong => return @divExact(CType.longlong.sizeInBits(target), 8), + .c_ulonglong => return @divExact(CType.ulonglong.sizeInBits(target), 8), + + .f16 => return 2, + .f32 => return 4, + .f64 => return 8, + .f128 => return 16, + .c_longdouble => return 16, + + .anyerror => return 2, // TODO revisit this when we have the concept of the error tag type + + .array => return self.cast(Payload.Array).?.elem_type.abiAlignment(target), + + .int_signed, .int_unsigned => { + const bits: u16 = if (self.cast(Payload.IntSigned)) |pl| + pl.bits + else if (self.cast(Payload.IntUnsigned)) |pl| + pl.bits + else + unreachable; + + return std.math.ceilPowerOfTwoPromote(u16, (bits + 7) / 8); + }, + + .c_void, + .void, + .type, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + => unreachable, + }; + } + pub fn isSinglePointer(self: Type) bool { return switch (self.tag()) { .u8, @@ -525,6 +585,50 @@ pub const Type = extern union { }; } + /// Asserts the type is an array or vector. + pub fn arraySentinel(self: Type) ?Value { + return switch (self.tag()) { + .u8, + .i8, + .isize, + .usize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .c_longdouble, + .f16, + .f32, + .f64, + .f128, + .c_void, + .bool, + .void, + .type, + .anyerror, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + .fn_noreturn_no_args, + .fn_naked_noreturn_no_args, + .fn_ccc_void_no_args, + .single_const_pointer, + .single_const_pointer_to_comptime_int, + .const_slice_u8, + .int_unsigned, + .int_signed, + => unreachable, + + .array => return null, + .array_u8_sentinel_0 => return Value.initTag(.zero), + }; + } + /// Returns true if and only if the type is a fixed-width, signed integer. pub fn isSignedInt(self: Type) bool { return switch (self.tag()) { From ebb81ebe59d56a2ccb104e100b9c96df82eedc97 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 01:22:04 -0400 Subject: [PATCH 12/31] fix the global offset table code and updating decl exports --- src-self-hosted/codegen.zig | 78 +++++++++++----- src-self-hosted/ir.zig | 172 ++++++++++++++++++++++++++++-------- src-self-hosted/ir/text.zig | 12 ++- src-self-hosted/link.zig | 48 +++++++++- 4 files changed, 249 insertions(+), 61 deletions(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index b19a3f24f7..6ebf68df90 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -33,6 +33,7 @@ pub fn generateSymbol( var function = Function{ .target = &bin_file.options.target, + .bin_file = bin_file, .mod_fn = module_fn, .code = code, .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator), @@ -144,6 +145,7 @@ pub fn generateSymbol( } const Function = struct { + bin_file: *link.ElfFile, target: *const std.Target, mod_fn: *const ir.Module.Fn, code: *std.ArrayList(u8), @@ -160,6 +162,8 @@ const Function = struct { /// The value is in a target-specific register. The value can /// be @intToEnum casted to the respective Reg enum. register: usize, + /// The value is in memory at a hard-coded address. + memory: u64, }; fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue { @@ -375,6 +379,7 @@ const Function = struct { }, .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}), .register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}), + .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rax = memory", .{}), }, .rdx => switch (mcv) { .none, .unreach => unreachable, @@ -406,6 +411,7 @@ const Function = struct { }, .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}), .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}), + .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = memory", .{}), }, .rdi => switch (mcv) { .none, .unreach => unreachable, @@ -437,10 +443,37 @@ const Function = struct { }, .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = embedded_in_code", .{}), .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = register", .{}), + .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = memory", .{}), }, .rsi => switch (mcv) { .none, .unreach => unreachable, - .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = immediate", .{}), + .immediate => |x| { + // Setting the edi register zeroes the upper part of rdi, so if the number is small + // enough, that is preferable. + // Best case: zero + // 31 f6 xor esi,esi + if (x == 0) { + return self.code.appendSlice(&[_]u8{ 0x31, 0xf6 }); + } + // Next best case: set esi with 4 bytes + // be 40 30 20 10 mov esi,0x10203040 + if (x <= std.math.maxInt(u32)) { + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xbe; + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + return; + } + // Worst case: set rsi with 8 bytes + // 48 be 80 70 60 50 40 30 20 10 movabs rsi,0x1020304050607080 + + try self.code.resize(self.code.items.len + 10); + self.code.items[self.code.items.len - 10] = 0x48; + self.code.items[self.code.items.len - 9] = 0xbe; + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + return; + }, .embedded_in_code => |code_offset| { // Examples: // lea rsi, [rip + 0x01020304] @@ -462,6 +495,21 @@ const Function = struct { return; }, .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}), + .memory => |x| { + if (x <= std.math.maxInt(u32)) { + // 48 8b 34 25 40 30 20 10 mov rsi,QWORD PTR ds:0x10203040 + try self.code.resize(self.code.items.len + 8); + self.code.items[self.code.items.len - 8] = 0x48; + self.code.items[self.code.items.len - 7] = 0x8b; + self.code.items[self.code.items.len - 6] = 0x34; + self.code.items[self.code.items.len - 5] = 0x25; + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + return; + } else { + return self.fail(src, "TODO implement genSetReg for x86_64 setting rsi to 64-bit memory", .{}); + } + }, }, else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}), }, @@ -493,33 +541,21 @@ const Function = struct { } fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue { + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); const allocator = self.code.allocator; switch (typed_value.ty.zigTypeTag()) { .Pointer => { - const ptr_elem_type = typed_value.ty.elemType(); - switch (ptr_elem_type.zigTypeTag()) { - .Array => { - // TODO more checks to make sure this can be emitted as a string literal - const bytes = typed_value.val.toAllocatedBytes(allocator) catch |err| switch (err) { - error.AnalysisFail => unreachable, - else => |e| return e, - }; - defer allocator.free(bytes); - const smaller_len = std.math.cast(u32, bytes.len) catch - return self.fail(src, "TODO handle a larger string constant", .{}); - - // Emit the string literal directly into the code; jump over it. - try self.genRelativeFwdJump(src, smaller_len); - const offset = self.code.items.len; - try self.code.appendSlice(bytes); - return MCValue{ .embedded_in_code = offset }; - }, - else => |t| return self.fail(src, "TODO implement emitTypedValue for pointer to '{}'", .{@tagName(t)}), + if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { + const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; + const decl = payload.decl; + const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes; + return MCValue{ .memory = got_addr }; } + return self.fail(src, "TODO codegen more kinds of const pointers", .{}); }, .Int => { const info = typed_value.ty.intInfo(self.target.*); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); if (info.bits > ptr_bits or info.signed) { return self.fail(src, "TODO const int bigger than ptr and signed int", .{}); } diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index b357a62932..92a5aa7fdf 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -292,6 +292,8 @@ pub const Module = struct { /// TODO look into using a lightweight map/set data structure rather than a linear array. dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){}, + contents_hash: Hash, + pub fn destroy(self: *Decl, allocator: *Allocator) void { allocator.free(mem.spanZ(self.name)); if (self.typedValueManaged()) |tvm| { @@ -465,26 +467,42 @@ pub const Module = struct { module: *text.Module, }, status: enum { - unloaded, + never_loaded, + unloaded_success, unloaded_parse_failure, + unloaded_sema_failure, loaded_parse_failure, loaded_sema_failure, loaded_success, }, - pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + pub fn unload(self: *ZIRModule, allocator: *Allocator) void { switch (self.status) { - .unloaded, + .never_loaded, .unloaded_parse_failure, + .unloaded_sema_failure, + .unloaded_success, => {}, - .loaded_success, .loaded_sema_failure => { + + .loaded_success => { allocator.free(self.source.bytes); self.contents.module.deinit(allocator); + self.status = .unloaded_success; + }, + .loaded_sema_failure => { + allocator.free(self.source.bytes); + self.contents.module.deinit(allocator); + self.status = .unloaded_sema_failure; }, .loaded_parse_failure => { allocator.free(self.source.bytes); + self.status = .unloaded_parse_failure; }, } + } + + pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + self.unload(allocator); self.* = undefined; } @@ -623,7 +641,8 @@ pub const Module = struct { try self.performAllTheWork(); - // TODO unload all the source files from memory + // Unload all the source files from memory. + self.root_scope.unload(self.allocator); try self.bin_file.flush(); self.link_error_flags = self.bin_file.error_flags; @@ -722,8 +741,8 @@ pub const Module = struct { .success => {}, } } - if (!decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()) - continue; + + assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()); self.bin_file.updateDecl(self, decl) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, @@ -748,7 +767,7 @@ pub const Module = struct { fn getTextModule(self: *Module, root_scope: *Scope.ZIRModule) !*text.Module { switch (root_scope.status) { - .unloaded => { + .never_loaded, .unloaded_success => { try self.failed_files.ensureCapacity(self.failed_files.size + 1); var keep_source = false; @@ -789,6 +808,7 @@ pub const Module = struct { }, .unloaded_parse_failure, + .unloaded_sema_failure, .loaded_parse_failure, .loaded_sema_failure, => return error.AnalysisFail, @@ -804,16 +824,62 @@ pub const Module = struct { // Here we simulate adding a source file which was previously not part of the compilation, // which means scanning the decls looking for exports. // TODO also identify decls that need to be deleted. - const src_module = try self.getTextModule(root_scope); + switch (root_scope.status) { + .never_loaded => { + const src_module = try self.getTextModule(root_scope); - // Here we ensure enough queue capacity to store all the decls, so that later we can use - // appendAssumeCapacity. - try self.work_queue.ensureUnusedCapacity(src_module.decls.len); + // Here we ensure enough queue capacity to store all the decls, so that later we can use + // appendAssumeCapacity. + try self.work_queue.ensureUnusedCapacity(src_module.decls.len); - for (src_module.decls) |decl| { - if (decl.cast(text.Inst.Export)) |export_inst| { - _ = try self.resolveDecl(&root_scope.base, &export_inst.base); - } + for (src_module.decls) |decl| { + if (decl.cast(text.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, + + .unloaded_parse_failure, + .unloaded_sema_failure, + .loaded_parse_failure, + .loaded_sema_failure, + .loaded_success, + .unloaded_success, + => { + const src_module = try self.getTextModule(root_scope); + + // Look for changed decls. + for (src_module.decls) |src_decl| { + const name_hash = Decl.hashSimpleName(src_decl.name); + if (self.decl_table.get(name_hash)) |kv| { + const decl = kv.value; + const new_contents_hash = Decl.hashSimpleName(src_decl.contents); + if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) { + // TODO recursive dependency management + std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); + self.decl_table.removeAssertDiscard(name_hash); + const saved_link = decl.link; + decl.destroy(self.allocator); + if (self.export_owners.getValue(decl)) |exports| { + @panic("TODO handle updating a decl that does an export"); + } + const new_decl = self.resolveDecl( + &root_scope.base, + src_decl, + saved_link, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => continue, + }; + if (self.decl_exports.remove(decl)) |entry| { + self.decl_exports.putAssumeCapacityNoClobber(new_decl, entry.value); + } + } + } else if (src_decl.cast(text.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, } } @@ -846,11 +912,17 @@ pub const Module = struct { }; } - fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { + fn resolveDecl( + self: *Module, + scope: *Scope, + old_inst: *text.Inst, + bin_file_link: link.ElfFile.Decl, + ) InnerError!*Decl { const hash = Decl.hashSimpleName(old_inst.name); if (self.decl_table.get(hash)) |kv| { return kv.value; } else { + std.debug.warn("creating new decl for {}\n", .{old_inst.name}); const new_decl = blk: { try self.decl_table.ensureCapacity(self.decl_table.size + 1); const new_decl = try self.allocator.create(Decl); @@ -863,6 +935,8 @@ pub const Module = struct { .src = old_inst.src, .typed_value = .{ .never_succeeded = {} }, .analysis = .initial_in_progress, + .contents_hash = Decl.hashSimpleName(old_inst.contents), + .link = bin_file_link, }; self.decl_table.putAssumeCapacityNoClobber(hash, new_decl); break :blk new_decl; @@ -887,6 +961,14 @@ pub const Module = struct { }; const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); + const has_codegen_bits = typed_value.ty.hasCodeGenBits(); + if (has_codegen_bits) { + // We don't fully codegen the decl until later, but we do need to reserve a global + // offset table index for it. This allows us to codegen decls out of dependency order, + // increasing how many computations can be done in parallel. + try self.bin_file.allocateDeclIndexes(new_decl); + } + arena_state.* = decl_scope.arena.state; new_decl.typed_value = .{ @@ -896,14 +978,16 @@ pub const Module = struct { }, }; new_decl.analysis = .complete; - // We ensureCapacity when scanning for decls. - self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); + if (has_codegen_bits) { + // We ensureCapacity when scanning for decls. + self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); + } return new_decl; } } fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { - const decl = try self.resolveDecl(scope, old_inst); + const decl = try self.resolveDecl(scope, old_inst, link.ElfFile.Decl.empty); switch (decl.analysis) { .initial_in_progress => unreachable, .repeat_in_progress => unreachable, @@ -2088,8 +2172,8 @@ pub fn main() anyerror!void { const src_path = args[1]; const bin_path = args[2]; - const debug_error_trace = true; - const output_zir = true; + const debug_error_trace = false; + const output_zir = false; const object_format: ?std.builtin.ObjectFormat = null; const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); @@ -2112,7 +2196,7 @@ pub fn main() anyerror!void { .sub_file_path = root_pkg.root_src_path, .source = .{ .unloaded = {} }, .contents = .{ .not_available = {} }, - .status = .unloaded, + .status = .never_loaded, }; break :blk Module{ @@ -2132,22 +2216,38 @@ pub fn main() anyerror!void { }; defer module.deinit(); - try module.update(); + const stdin = std.io.getStdIn().inStream(); + const stderr = std.io.getStdErr().outStream(); + var repl_buf: [1024]u8 = undefined; - var errors = try module.getAllErrorsAlloc(); - defer errors.deinit(allocator); + while (true) { + try module.update(); - if (errors.list.len != 0) { - for (errors.list) |full_err_msg| { - std.debug.warn("{}:{}:{}: error: {}\n", .{ - full_err_msg.src_path, - full_err_msg.line + 1, - full_err_msg.column + 1, - full_err_msg.msg, - }); + var errors = try module.getAllErrorsAlloc(); + defer errors.deinit(allocator); + + if (errors.list.len != 0) { + for (errors.list) |full_err_msg| { + std.debug.warn("{}:{}:{}: error: {}\n", .{ + full_err_msg.src_path, + full_err_msg.line + 1, + full_err_msg.column + 1, + full_err_msg.msg, + }); + } + if (debug_error_trace) return error.AnalysisFail; + } + + try stderr.print("🦎 ", .{}); + if (try stdin.readUntilDelimiterOrEof(&repl_buf, '\n')) |line| { + if (mem.eql(u8, line, "update")) { + continue; + } else { + try stderr.print("unknown command: {}\n", .{line}); + } + } else { + break; } - if (debug_error_trace) return error.AnalysisFail; - std.process.exit(1); } if (output_zir) { diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index 8f189f49e3..f283fb5410 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -19,6 +19,9 @@ pub const Inst = struct { src: usize, name: []const u8, + /// Slice into the source of the part after the = and before the next instruction. + contents: []const u8, + /// These names are used directly as the instruction names in the text format. pub const Tag = enum { breakpoint, @@ -798,11 +801,12 @@ const Parser = struct { } fn parseInstruction(self: *Parser, body_ctx: ?*Body, name: []const u8) InnerError!*Inst { + const contents_start = self.i; const fn_name = try skipToAndOver(self, '('); inline for (@typeInfo(Inst.Tag).Enum.fields) |field| { if (mem.eql(u8, field.name, fn_name)) { const tag = @field(Inst.Tag, field.name); - return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name); + return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name, contents_start); } } return self.fail("unknown instruction '{}'", .{fn_name}); @@ -814,12 +818,14 @@ const Parser = struct { comptime InstType: type, body_ctx: ?*Body, inst_name: []const u8, + contents_start: usize, ) InnerError!*Inst { const inst_specific = try self.arena.allocator.create(InstType); inst_specific.base = .{ .name = inst_name, .src = self.i, .tag = InstType.base_tag, + .contents = undefined, }; if (@hasField(InstType, "ty")) { @@ -867,6 +873,8 @@ const Parser = struct { } try requireEatBytes(self, ")"); + inst_specific.base.contents = self.source[contents_start..self.i]; + return &inst_specific.base; } @@ -952,6 +960,7 @@ const Parser = struct { .name = try self.generateName(), .src = src, .tag = Inst.Str.base_tag, + .contents = undefined, }, .positionals = .{ .bytes = ident }, .kw_args = .{}, @@ -962,6 +971,7 @@ const Parser = struct { .name = try self.generateName(), .src = src, .tag = Inst.DeclRef.base_tag, + .contents = undefined, }, .positionals = .{ .name = &name.base }, .kw_args = .{}, diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index fb3953fe4f..f7237f4d60 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -310,7 +310,7 @@ pub const ElfFile = struct { // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. // we'll need to re-use that function anyway, in case the GOT grows and overlaps something // else in virtual memory. - const default_got_addr = 0x80000000; + const default_got_addr = 0x4000000; try self.program_headers.append(self.allocator, .{ .p_type = elf.PT_LOAD, .p_offset = off, @@ -755,6 +755,35 @@ pub const ElfFile = struct { }; } + pub fn allocateDeclIndexes(self: *ElfFile, decl: *ir.Module.Decl) !void { + if (decl.link.local_sym_index != 0) return; + + try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); + try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); + const local_sym_index = self.local_symbols.items.len; + const offset_table_index = self.offset_table.items.len; + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + + self.local_symbols.appendAssumeCapacity(.{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = phdr.p_vaddr, + .st_size = 0, + }); + errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); + self.offset_table.appendAssumeCapacity(0); + errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); + + self.offset_table_count_dirty = true; + + decl.link = .{ + .local_sym_index = @intCast(u32, local_sym_index), + .offset_table_index = @intCast(u32, offset_table_index), + }; + } + pub fn updateDecl(self: *ElfFile, module: *ir.Module, decl: *ir.Module.Decl) !void { var code_buffer = std.ArrayList(u8).init(self.allocator); defer code_buffer.deinit(); @@ -781,21 +810,33 @@ pub const ElfFile = struct { if (decl.link.local_sym_index != 0) { const local_sym = &self.local_symbols.items[decl.link.local_sym_index]; const existing_block = self.findAllocatedTextBlock(local_sym.*); - const need_realloc = code.len > existing_block.size_capacity or + const need_realloc = local_sym.st_size == 0 or + code.len > existing_block.size_capacity or !mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment); + // TODO check for collision with another symbol const file_offset = if (need_realloc) fo: { const new_block = try self.allocateTextBlock(code.len, required_alignment); local_sym.st_value = new_block.vaddr; - local_sym.st_size = code.len; + self.offset_table.items[decl.link.offset_table_index] = new_block.vaddr; + //std.debug.warn("{}: writing got index {}=0x{x}\n", .{ + // decl.name, + // decl.link.offset_table_index, + // self.offset_table.items[decl.link.offset_table_index], + //}); try self.writeOffsetTableEntry(decl.link.offset_table_index); break :fo new_block.file_offset; } else existing_block.file_offset; + local_sym.st_size = code.len; local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(decl.name)); local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; + local_sym.st_other = 0; + local_sym.st_shndx = self.text_section_index.?; // TODO this write could be avoided if no fields of the symbol were changed. try self.writeSymbol(decl.link.local_sym_index); + + //std.debug.warn("updating {} at vaddr 0x{x}\n", .{ decl.name, local_sym.st_value }); break :blk file_offset; } else { try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); @@ -829,6 +870,7 @@ pub const ElfFile = struct { .offset_table_index = @intCast(u32, offset_table_index), }; + //std.debug.warn("writing new {} at vaddr 0x{x}\n", .{ decl.name, new_block.vaddr }); break :blk new_block.file_offset; } }; From e1d4b59c5bd47c7bef3e41279c97cd36991676e2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 15:20:42 -0400 Subject: [PATCH 13/31] self-hosted: update main.zig After this commit there are no more bit rotted files. The testing program that was in ir.zig has been moved to main.zig Unsupported command line options have been deleted, or error messages added. The compiler repl is available from the build-exe, build-lib, build-obj commands with the --watch option. The main zig build script now builds the self-hosted compiler unconditionally. Linking against LLVM is behind a -Denable-llvm flag that defaults to off. --- build.zig | 15 +- src-self-hosted/compilation.zig | 1457 ------------------------------- src-self-hosted/errmsg.zig | 284 ------ src-self-hosted/ir.zig | 99 --- src-self-hosted/ir/text.zig | 5 +- src-self-hosted/main.zig | 941 ++++++++++---------- src-self-hosted/stage2.zig | 254 +----- 7 files changed, 450 insertions(+), 2605 deletions(-) delete mode 100644 src-self-hosted/compilation.zig delete mode 100644 src-self-hosted/errmsg.zig diff --git a/build.zig b/build.zig index ab1d985b74..b57ae69638 100644 --- a/build.zig +++ b/build.zig @@ -51,6 +51,9 @@ pub fn build(b: *Builder) !void { var exe = b.addExecutable("zig", "src-self-hosted/main.zig"); exe.setBuildMode(mode); + test_step.dependOn(&exe.step); + b.default_step.dependOn(&exe.step); + exe.install(); const skip_release = b.option(bool, "skip-release", "Main test suite skips release builds") orelse false; const skip_release_small = b.option(bool, "skip-release-small", "Main test suite skips release-small builds") orelse skip_release; @@ -58,21 +61,17 @@ pub fn build(b: *Builder) !void { const skip_release_safe = b.option(bool, "skip-release-safe", "Main test suite skips release-safe builds") orelse skip_release; const skip_non_native = b.option(bool, "skip-non-native", "Main test suite skips non-native builds") orelse false; const skip_libc = b.option(bool, "skip-libc", "Main test suite skips tests that link libc") orelse false; - const skip_self_hosted = (b.option(bool, "skip-self-hosted", "Main test suite skips building self hosted compiler") orelse false) or true; // TODO evented I/O good enough that this passes everywhere - if (!skip_self_hosted) { - test_step.dependOn(&exe.step); - } const only_install_lib_files = b.option(bool, "lib-files-only", "Only install library files") orelse false; - if (!only_install_lib_files and !skip_self_hosted) { + const enable_llvm = b.option(bool, "enable-llvm", "Build self-hosted compiler with LLVM backend enabled") orelse false; + if (enable_llvm) { var ctx = parseConfigH(b, config_h_text); ctx.llvm = try findLLVM(b, ctx.llvm_config_exe); try configureStage2(b, exe, ctx); - - b.default_step.dependOn(&exe.step); - exe.install(); } + const link_libc = b.option(bool, "force-link-libc", "Force self-hosted compiler to link libc") orelse false; + if (link_libc) exe.linkLibC(); b.installDirectory(InstallDirectoryOptions{ .source_dir = "lib", diff --git a/src-self-hosted/compilation.zig b/src-self-hosted/compilation.zig deleted file mode 100644 index 75be005c83..0000000000 --- a/src-self-hosted/compilation.zig +++ /dev/null @@ -1,1457 +0,0 @@ -const std = @import("std"); -const io = std.io; -const mem = std.mem; -const Allocator = mem.Allocator; -const ArrayListSentineled = std.ArrayListSentineled; -const llvm = @import("llvm.zig"); -const c = @import("c.zig"); -const builtin = std.builtin; -const Target = std.Target; -const warn = std.debug.warn; -const Token = std.zig.Token; -const ArrayList = std.ArrayList; -const errmsg = @import("errmsg.zig"); -const ast = std.zig.ast; -const event = std.event; -const assert = std.debug.assert; -const AtomicRmwOp = builtin.AtomicRmwOp; -const AtomicOrder = builtin.AtomicOrder; -const Scope = @import("scope.zig").Scope; -const Decl = @import("decl.zig").Decl; -const ir = @import("ir.zig"); -const Value = @import("value.zig").Value; -const Type = Value.Type; -const Span = errmsg.Span; -const Msg = errmsg.Msg; -const codegen = @import("codegen.zig"); -const Package = @import("package.zig").Package; -const link = @import("link.zig").link; -const LibCInstallation = @import("libc_installation.zig").LibCInstallation; -const CInt = @import("c_int.zig").CInt; -const fs = std.fs; - -pub const Visib = enum { - Private, - Pub, -}; - -const max_src_size = 2 * 1024 * 1024 * 1024; // 2 GiB - -/// Data that is local to the event loop. -pub const ZigCompiler = struct { - llvm_handle_pool: std.atomic.Stack(*llvm.Context), - lld_lock: event.Lock, - allocator: *Allocator, - - /// TODO pool these so that it doesn't have to lock - prng: event.Locked(std.rand.DefaultPrng), - - native_libc: event.Future(LibCInstallation), - - var lazy_init_targets = std.once(initializeAllTargets); - - pub fn init(allocator: *Allocator) !ZigCompiler { - lazy_init_targets.call(); - - var seed_bytes: [@sizeOf(u64)]u8 = undefined; - try std.crypto.randomBytes(seed_bytes[0..]); - const seed = mem.readIntNative(u64, &seed_bytes); - - return ZigCompiler{ - .allocator = allocator, - .lld_lock = event.Lock.init(), - .llvm_handle_pool = std.atomic.Stack(*llvm.Context).init(), - .prng = event.Locked(std.rand.DefaultPrng).init(std.rand.DefaultPrng.init(seed)), - .native_libc = event.Future(LibCInstallation).init(), - }; - } - - /// Must be called only after EventLoop.run completes. - fn deinit(self: *ZigCompiler) void { - self.lld_lock.deinit(); - while (self.llvm_handle_pool.pop()) |node| { - llvm.ContextDispose(node.data); - self.allocator.destroy(node); - } - } - - /// Gets an exclusive handle on any LlvmContext. - /// Caller must release the handle when done. - pub fn getAnyLlvmContext(self: *ZigCompiler) !LlvmHandle { - if (self.llvm_handle_pool.pop()) |node| return LlvmHandle{ .node = node }; - - const context_ref = llvm.ContextCreate() orelse return error.OutOfMemory; - errdefer llvm.ContextDispose(context_ref); - - const node = try self.allocator.create(std.atomic.Stack(*llvm.Context).Node); - node.* = std.atomic.Stack(*llvm.Context).Node{ - .next = undefined, - .data = context_ref, - }; - errdefer self.allocator.destroy(node); - - return LlvmHandle{ .node = node }; - } - - pub fn getNativeLibC(self: *ZigCompiler) !*LibCInstallation { - if (self.native_libc.start()) |ptr| return ptr; - self.native_libc.data = try LibCInstallation.findNative(.{ .allocator = self.allocator }); - self.native_libc.resolve(); - return &self.native_libc.data; - } - - /// Must be called only once, ever. Sets global state. - pub fn setLlvmArgv(allocator: *Allocator, llvm_argv: []const []const u8) !void { - if (llvm_argv.len != 0) { - var c_compatible_args = try std.cstr.NullTerminated2DArray.fromSlices(allocator, &[_][]const []const u8{ - &[_][]const u8{"zig (LLVM option parsing)"}, - llvm_argv, - }); - defer c_compatible_args.deinit(); - c.ZigLLVMParseCommandLineOptions(llvm_argv.len + 1, c_compatible_args.ptr); - } - } -}; - -pub const LlvmHandle = struct { - node: *std.atomic.Stack(*llvm.Context).Node, - - pub fn release(self: LlvmHandle, zig_compiler: *ZigCompiler) void { - zig_compiler.llvm_handle_pool.push(self.node); - } -}; - -pub const Compilation = struct { - pub const FnLinkSet = std.TailQueue(?*Value.Fn); - - zig_compiler: *ZigCompiler, - name: ArrayListSentineled(u8, 0), - llvm_triple: ArrayListSentineled(u8, 0), - root_src_path: ?[]const u8, - target: std.Target, - llvm_target: *llvm.Target, - build_mode: builtin.Mode, - zig_lib_dir: []const u8, - zig_std_dir: []const u8, - - /// lazily created when we need it - tmp_dir: event.Future(BuildError![]u8) = event.Future(BuildError![]u8).init(), - - version: builtin.Version = builtin.Version{ .major = 0, .minor = 0, .patch = 0 }, - - linker_script: ?[]const u8 = null, - out_h_path: ?[]const u8 = null, - - is_test: bool = false, - strip: bool = false, - is_static: bool, - linker_rdynamic: bool = false, - - clang_argv: []const []const u8 = &[_][]const u8{}, - assembly_files: []const []const u8 = &[_][]const u8{}, - - /// paths that are explicitly provided by the user to link against - link_objects: []const []const u8 = &[_][]const u8{}, - - /// functions that have their own objects that we need to link - /// it uses an optional pointer so that tombstone removals are possible - fn_link_set: event.Locked(FnLinkSet) = event.Locked(FnLinkSet).init(FnLinkSet.init()), - - link_libs_list: ArrayList(*LinkLib), - libc_link_lib: ?*LinkLib = null, - - err_color: errmsg.Color = .Auto, - - verbose_tokenize: bool = false, - verbose_ast_tree: bool = false, - verbose_ast_fmt: bool = false, - verbose_cimport: bool = false, - verbose_ir: bool = false, - verbose_llvm_ir: bool = false, - verbose_link: bool = false, - - link_eh_frame_hdr: bool = false, - - darwin_version_min: DarwinVersionMin = .None, - - test_filters: []const []const u8 = &[_][]const u8{}, - test_name_prefix: ?[]const u8 = null, - - emit_bin: bool = true, - emit_asm: bool = false, - emit_llvm_ir: bool = false, - emit_h: bool = false, - - kind: Kind, - - events: *event.Channel(Event), - - exported_symbol_names: event.Locked(Decl.Table), - - /// Before code generation starts, must wait on this group to make sure - /// the build is complete. - prelink_group: event.Group(BuildError!void), - - compile_errors: event.Locked(CompileErrList), - - meta_type: *Type.MetaType, - void_type: *Type.Void, - bool_type: *Type.Bool, - noreturn_type: *Type.NoReturn, - comptime_int_type: *Type.ComptimeInt, - u8_type: *Type.Int, - - void_value: *Value.Void, - true_value: *Value.Bool, - false_value: *Value.Bool, - noreturn_value: *Value.NoReturn, - - target_machine: *llvm.TargetMachine, - target_data_ref: *llvm.TargetData, - target_layout_str: [*:0]u8, - target_ptr_bits: u32, - - /// for allocating things which have the same lifetime as this Compilation - arena_allocator: std.heap.ArenaAllocator, - - root_package: *Package, - std_package: *Package, - - override_libc: ?*LibCInstallation = null, - - /// need to wait on this group before deinitializing - deinit_group: event.Group(void), - - destroy_frame: *@Frame(createAsync), - main_loop_frame: *@Frame(Compilation.mainLoop), - main_loop_future: event.Future(void) = event.Future(void).init(), - - have_err_ret_tracing: bool = false, - - /// not locked because it is read-only - primitive_type_table: TypeTable, - - int_type_table: event.Locked(IntTypeTable), - array_type_table: event.Locked(ArrayTypeTable), - ptr_type_table: event.Locked(PtrTypeTable), - fn_type_table: event.Locked(FnTypeTable), - - c_int_types: [CInt.list.len]*Type.Int, - - fs_watch: *fs.Watch(*Scope.Root), - - cancelled: bool = false, - - const IntTypeTable = std.HashMap(*const Type.Int.Key, *Type.Int, Type.Int.Key.hash, Type.Int.Key.eql); - const ArrayTypeTable = std.HashMap(*const Type.Array.Key, *Type.Array, Type.Array.Key.hash, Type.Array.Key.eql); - const PtrTypeTable = std.HashMap(*const Type.Pointer.Key, *Type.Pointer, Type.Pointer.Key.hash, Type.Pointer.Key.eql); - const FnTypeTable = std.HashMap(*const Type.Fn.Key, *Type.Fn, Type.Fn.Key.hash, Type.Fn.Key.eql); - const TypeTable = std.StringHashMap(*Type); - - const CompileErrList = std.ArrayList(*Msg); - - // TODO handle some of these earlier and report them in a way other than error codes - pub const BuildError = error{ - OutOfMemory, - EndOfStream, - IsDir, - Unexpected, - SystemResources, - SharingViolation, - PathAlreadyExists, - FileNotFound, - AccessDenied, - PipeBusy, - FileTooBig, - SymLinkLoop, - ProcessFdQuotaExceeded, - NameTooLong, - SystemFdQuotaExceeded, - NoDevice, - NoSpaceLeft, - NotDir, - FileSystem, - OperationAborted, - IoPending, - BrokenPipe, - WouldBlock, - FileClosed, - DestinationAddressRequired, - DiskQuota, - InputOutput, - NoStdHandles, - Overflow, - NotSupported, - BufferTooSmall, - Unimplemented, // TODO remove this one - SemanticAnalysisFailed, // TODO remove this one - ReadOnlyFileSystem, - LinkQuotaExceeded, - EnvironmentVariableNotFound, - AppDataDirUnavailable, - LinkFailed, - LibCRequiredButNotProvidedOrFound, - LibCMissingDynamicLinker, - InvalidDarwinVersionString, - UnsupportedLinkArchitecture, - UserResourceLimitReached, - InvalidUtf8, - BadPathName, - DeviceBusy, - CurrentWorkingDirectoryUnlinked, - }; - - pub const Event = union(enum) { - Ok, - Error: BuildError, - Fail: []*Msg, - }; - - pub const DarwinVersionMin = union(enum) { - None, - MacOS: []const u8, - Ios: []const u8, - }; - - pub const Kind = enum { - Exe, - Lib, - Obj, - }; - - pub const LinkLib = struct { - name: []const u8, - path: ?[]const u8, - - /// the list of symbols we depend on from this lib - symbols: ArrayList([]u8), - provided_explicitly: bool, - }; - - pub const Emit = enum { - Binary, - Assembly, - LlvmIr, - }; - - pub fn create( - zig_compiler: *ZigCompiler, - name: []const u8, - root_src_path: ?[]const u8, - target: std.zig.CrossTarget, - kind: Kind, - build_mode: builtin.Mode, - is_static: bool, - zig_lib_dir: []const u8, - ) !*Compilation { - var optional_comp: ?*Compilation = null; - var frame = try zig_compiler.allocator.create(@Frame(createAsync)); - errdefer zig_compiler.allocator.destroy(frame); - frame.* = async createAsync( - &optional_comp, - zig_compiler, - name, - root_src_path, - target, - kind, - build_mode, - is_static, - zig_lib_dir, - ); - // TODO causes segfault - // return optional_comp orelse if (await frame) |_| unreachable else |err| err; - if (optional_comp) |comp| { - return comp; - } else if (await frame) |_| unreachable else |err| return err; - } - fn createAsync( - out_comp: *?*Compilation, - zig_compiler: *ZigCompiler, - name: []const u8, - root_src_path: ?[]const u8, - cross_target: std.zig.CrossTarget, - kind: Kind, - build_mode: builtin.Mode, - is_static: bool, - zig_lib_dir: []const u8, - ) callconv(.Async) !void { - const allocator = zig_compiler.allocator; - - // TODO merge this line with stage2.zig crossTargetToTarget - const target_info = try std.zig.system.NativeTargetInfo.detect(std.heap.c_allocator, cross_target); - const target = target_info.target; - - var comp = Compilation{ - .arena_allocator = std.heap.ArenaAllocator.init(allocator), - .zig_compiler = zig_compiler, - .events = undefined, - .root_src_path = root_src_path, - .target = target, - .llvm_target = undefined, - .kind = kind, - .build_mode = build_mode, - .zig_lib_dir = zig_lib_dir, - .zig_std_dir = undefined, - .destroy_frame = @frame(), - .main_loop_frame = undefined, - - .name = undefined, - .llvm_triple = undefined, - .is_static = is_static, - .link_libs_list = undefined, - .exported_symbol_names = event.Locked(Decl.Table).init(Decl.Table.init(allocator)), - .prelink_group = event.Group(BuildError!void).init(allocator), - .deinit_group = event.Group(void).init(allocator), - .compile_errors = event.Locked(CompileErrList).init(CompileErrList.init(allocator)), - .int_type_table = event.Locked(IntTypeTable).init(IntTypeTable.init(allocator)), - .array_type_table = event.Locked(ArrayTypeTable).init(ArrayTypeTable.init(allocator)), - .ptr_type_table = event.Locked(PtrTypeTable).init(PtrTypeTable.init(allocator)), - .fn_type_table = event.Locked(FnTypeTable).init(FnTypeTable.init(allocator)), - .c_int_types = undefined, - - .meta_type = undefined, - .void_type = undefined, - .void_value = undefined, - .bool_type = undefined, - .true_value = undefined, - .false_value = undefined, - .noreturn_type = undefined, - .noreturn_value = undefined, - .comptime_int_type = undefined, - .u8_type = undefined, - - .target_machine = undefined, - .target_data_ref = undefined, - .target_layout_str = undefined, - .target_ptr_bits = target.cpu.arch.ptrBitWidth(), - - .root_package = undefined, - .std_package = undefined, - - .primitive_type_table = undefined, - - .fs_watch = undefined, - }; - comp.link_libs_list = ArrayList(*LinkLib).init(comp.arena()); - comp.primitive_type_table = TypeTable.init(comp.arena()); - - defer { - comp.int_type_table.private_data.deinit(); - comp.array_type_table.private_data.deinit(); - comp.ptr_type_table.private_data.deinit(); - comp.fn_type_table.private_data.deinit(); - comp.arena_allocator.deinit(); - } - - comp.name = try ArrayListSentineled(u8, 0).init(comp.arena(), name); - comp.llvm_triple = try getLLVMTriple(comp.arena(), target); - comp.llvm_target = try llvmTargetFromTriple(comp.llvm_triple); - comp.zig_std_dir = try fs.path.join(comp.arena(), &[_][]const u8{ zig_lib_dir, "std" }); - - const opt_level = switch (build_mode) { - .Debug => llvm.CodeGenLevelNone, - else => llvm.CodeGenLevelAggressive, - }; - - const reloc_mode = if (is_static) llvm.RelocStatic else llvm.RelocPIC; - - var target_specific_cpu_args: ?[*:0]u8 = null; - var target_specific_cpu_features: ?[*:0]u8 = null; - defer llvm.DisposeMessage(target_specific_cpu_args); - defer llvm.DisposeMessage(target_specific_cpu_features); - - // TODO detect native CPU & features here - - comp.target_machine = llvm.CreateTargetMachine( - comp.llvm_target, - comp.llvm_triple.span(), - target_specific_cpu_args orelse "", - target_specific_cpu_features orelse "", - opt_level, - reloc_mode, - llvm.CodeModelDefault, - false, // TODO: add -ffunction-sections option - ) orelse return error.OutOfMemory; - defer llvm.DisposeTargetMachine(comp.target_machine); - - comp.target_data_ref = llvm.CreateTargetDataLayout(comp.target_machine) orelse return error.OutOfMemory; - defer llvm.DisposeTargetData(comp.target_data_ref); - - comp.target_layout_str = llvm.CopyStringRepOfTargetData(comp.target_data_ref) orelse return error.OutOfMemory; - defer llvm.DisposeMessage(comp.target_layout_str); - - comp.events = try allocator.create(event.Channel(Event)); - defer allocator.destroy(comp.events); - - comp.events.init(&[0]Event{}); - defer comp.events.deinit(); - - if (root_src_path) |root_src| { - const dirname = fs.path.dirname(root_src) orelse "."; - const basename = fs.path.basename(root_src); - - comp.root_package = try Package.create(comp.arena(), dirname, basename); - comp.std_package = try Package.create(comp.arena(), comp.zig_std_dir, "std.zig"); - try comp.root_package.add("std", comp.std_package); - } else { - comp.root_package = try Package.create(comp.arena(), ".", ""); - } - - comp.fs_watch = try fs.Watch(*Scope.Root).init(allocator, 16); - defer comp.fs_watch.deinit(); - - try comp.initTypes(); - defer comp.primitive_type_table.deinit(); - - comp.main_loop_frame = try allocator.create(@Frame(mainLoop)); - defer allocator.destroy(comp.main_loop_frame); - - comp.main_loop_frame.* = async comp.mainLoop(); - // Set this to indicate that initialization completed successfully. - // from here on out we must not return an error. - // This must occur before the first suspend/await. - out_comp.* = ∁ - // This suspend is resumed by destroy() - suspend; - // From here on is cleanup. - - comp.deinit_group.wait(); - - if (comp.tmp_dir.getOrNull()) |tmp_dir_result| - if (tmp_dir_result.*) |tmp_dir| { - fs.cwd().deleteTree(tmp_dir) catch {}; - } else |_| {}; - } - - /// it does ref the result because it could be an arbitrary integer size - pub fn getPrimitiveType(comp: *Compilation, name: []const u8) !?*Type { - if (name.len >= 2) { - switch (name[0]) { - 'i', 'u' => blk: { - for (name[1..]) |byte| - switch (byte) { - '0'...'9' => {}, - else => break :blk, - }; - const is_signed = name[0] == 'i'; - const bit_count = std.fmt.parseUnsigned(u32, name[1..], 10) catch |err| switch (err) { - error.Overflow => return error.Overflow, - error.InvalidCharacter => unreachable, // we just checked the characters above - }; - const int_type = try Type.Int.get(comp, Type.Int.Key{ - .bit_count = bit_count, - .is_signed = is_signed, - }); - errdefer int_type.base.base.deref(); - return &int_type.base; - }, - else => {}, - } - } - - if (comp.primitive_type_table.get(name)) |entry| { - entry.value.base.ref(); - return entry.value; - } - - return null; - } - - fn initTypes(comp: *Compilation) !void { - comp.meta_type = try comp.arena().create(Type.MetaType); - comp.meta_type.* = Type.MetaType{ - .base = Type{ - .name = "type", - .base = Value{ - .id = .Type, - .typ = undefined, - .ref_count = std.atomic.Int(usize).init(3), // 3 because it references itself twice - }, - .id = .Type, - .abi_alignment = Type.AbiAlignment.init(), - }, - .value = undefined, - }; - comp.meta_type.value = &comp.meta_type.base; - comp.meta_type.base.base.typ = &comp.meta_type.base; - assert((try comp.primitive_type_table.put(comp.meta_type.base.name, &comp.meta_type.base)) == null); - - comp.void_type = try comp.arena().create(Type.Void); - comp.void_type.* = Type.Void{ - .base = Type{ - .name = "void", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Void, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.void_type.base.name, &comp.void_type.base)) == null); - - comp.noreturn_type = try comp.arena().create(Type.NoReturn); - comp.noreturn_type.* = Type.NoReturn{ - .base = Type{ - .name = "noreturn", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .NoReturn, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.noreturn_type.base.name, &comp.noreturn_type.base)) == null); - - comp.comptime_int_type = try comp.arena().create(Type.ComptimeInt); - comp.comptime_int_type.* = Type.ComptimeInt{ - .base = Type{ - .name = "comptime_int", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .ComptimeInt, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.comptime_int_type.base.name, &comp.comptime_int_type.base)) == null); - - comp.bool_type = try comp.arena().create(Type.Bool); - comp.bool_type.* = Type.Bool{ - .base = Type{ - .name = "bool", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Bool, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.bool_type.base.name, &comp.bool_type.base)) == null); - - comp.void_value = try comp.arena().create(Value.Void); - comp.void_value.* = Value.Void{ - .base = Value{ - .id = .Void, - .typ = &Type.Void.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - }; - - comp.true_value = try comp.arena().create(Value.Bool); - comp.true_value.* = Value.Bool{ - .base = Value{ - .id = .Bool, - .typ = &Type.Bool.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .x = true, - }; - - comp.false_value = try comp.arena().create(Value.Bool); - comp.false_value.* = Value.Bool{ - .base = Value{ - .id = .Bool, - .typ = &Type.Bool.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .x = false, - }; - - comp.noreturn_value = try comp.arena().create(Value.NoReturn); - comp.noreturn_value.* = Value.NoReturn{ - .base = Value{ - .id = .NoReturn, - .typ = &Type.NoReturn.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - }; - - for (CInt.list) |cint, i| { - const c_int_type = try comp.arena().create(Type.Int); - c_int_type.* = Type.Int{ - .base = Type{ - .name = cint.zig_name, - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Int, - .abi_alignment = Type.AbiAlignment.init(), - }, - .key = Type.Int.Key{ - .is_signed = cint.is_signed, - .bit_count = cint.sizeInBits(comp.target), - }, - .garbage_node = undefined, - }; - comp.c_int_types[i] = c_int_type; - assert((try comp.primitive_type_table.put(cint.zig_name, &c_int_type.base)) == null); - } - comp.u8_type = try comp.arena().create(Type.Int); - comp.u8_type.* = Type.Int{ - .base = Type{ - .name = "u8", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Int, - .abi_alignment = Type.AbiAlignment.init(), - }, - .key = Type.Int.Key{ - .is_signed = false, - .bit_count = 8, - }, - .garbage_node = undefined, - }; - assert((try comp.primitive_type_table.put(comp.u8_type.base.name, &comp.u8_type.base)) == null); - } - - pub fn destroy(self: *Compilation) void { - const allocator = self.gpa(); - self.cancelled = true; - await self.main_loop_frame; - resume self.destroy_frame; - allocator.destroy(self.destroy_frame); - } - - fn start(self: *Compilation) void { - self.main_loop_future.resolve(); - } - fn mainLoop(self: *Compilation) callconv(.Async) void { - // wait until start() is called - _ = self.main_loop_future.get(); - - var build_result = self.initialCompile(); - - while (!self.cancelled) { - const link_result = if (build_result) blk: { - break :blk self.maybeLink(); - } else |err| err; - // this makes a handy error return trace and stack trace in debug mode - if (std.debug.runtime_safety) { - link_result catch unreachable; - } - - const compile_errors = blk: { - const held = self.compile_errors.acquire(); - defer held.release(); - break :blk held.value.toOwnedSlice(); - }; - - if (link_result) |_| { - if (compile_errors.len == 0) { - self.events.put(Event.Ok); - } else { - self.events.put(Event{ .Fail = compile_errors }); - } - } else |err| { - // if there's an error then the compile errors have dangling references - self.gpa().free(compile_errors); - - self.events.put(Event{ .Error = err }); - } - - // First, get an item from the watch channel, waiting on the channel. - var group = event.Group(BuildError!void).init(self.gpa()); - { - const ev = (self.fs_watch.channel.get()) catch |err| { - build_result = err; - continue; - }; - const root_scope = ev.data; - group.call(rebuildFile, .{ self, root_scope }) catch |err| { - build_result = err; - continue; - }; - } - // Next, get all the items from the channel that are buffered up. - while (self.fs_watch.channel.getOrNull()) |ev_or_err| { - if (ev_or_err) |ev| { - const root_scope = ev.data; - group.call(rebuildFile, .{ self, root_scope }) catch |err| { - build_result = err; - continue; - }; - } else |err| { - build_result = err; - continue; - } - } - build_result = group.wait(); - } - } - fn rebuildFile(self: *Compilation, root_scope: *Scope.Root) callconv(.Async) BuildError!void { - const tree_scope = blk: { - const source_code = fs.cwd().readFileAlloc( - self.gpa(), - root_scope.realpath, - max_src_size, - ) catch |err| { - try self.addCompileErrorCli(root_scope.realpath, "unable to open: {}", .{@errorName(err)}); - return; - }; - errdefer self.gpa().free(source_code); - - const tree = try std.zig.parse(self.gpa(), source_code); - errdefer { - tree.deinit(); - } - - break :blk try Scope.AstTree.create(self, tree, root_scope); - }; - defer tree_scope.base.deref(self); - - var error_it = tree_scope.tree.errors.iterator(0); - while (error_it.next()) |parse_error| { - const msg = try Msg.createFromParseErrorAndScope(self, tree_scope, parse_error); - errdefer msg.destroy(); - - try self.addCompileErrorAsync(msg); - } - if (tree_scope.tree.errors.len != 0) { - return; - } - - const locked_table = root_scope.decls.table.acquireWrite(); - defer locked_table.release(); - - var decl_group = event.Group(BuildError!void).init(self.gpa()); - - try self.rebuildChangedDecls( - &decl_group, - locked_table.value, - root_scope.decls, - &tree_scope.tree.root_node.decls, - tree_scope, - ); - - try decl_group.wait(); - } - - fn rebuildChangedDecls( - self: *Compilation, - group: *event.Group(BuildError!void), - locked_table: *Decl.Table, - decl_scope: *Scope.Decls, - ast_decls: *ast.Node.Root.DeclList, - tree_scope: *Scope.AstTree, - ) !void { - var existing_decls = try locked_table.clone(); - defer existing_decls.deinit(); - - var ast_it = ast_decls.iterator(0); - while (ast_it.next()) |decl_ptr| { - const decl = decl_ptr.*; - switch (decl.id) { - .Comptime => { - const comptime_node = @fieldParentPtr(ast.Node.Comptime, "base", decl); - - // TODO connect existing comptime decls to updated source files - - try self.prelink_group.call(addCompTimeBlock, .{ self, tree_scope, &decl_scope.base, comptime_node }); - }, - .VarDecl => @panic("TODO"), - .FnProto => { - const fn_proto = @fieldParentPtr(ast.Node.FnProto, "base", decl); - - const name = if (fn_proto.name_token) |name_token| tree_scope.tree.tokenSlice(name_token) else { - try self.addCompileError(tree_scope, Span{ - .first = fn_proto.fn_token, - .last = fn_proto.fn_token + 1, - }, "missing function name", .{}); - continue; - }; - - if (existing_decls.remove(name)) |entry| { - // compare new code to existing - if (entry.value.cast(Decl.Fn)) |existing_fn_decl| { - // Just compare the old bytes to the new bytes of the top level decl. - // Even if the AST is technically the same, we want error messages to display - // from the most recent source. - const old_decl_src = existing_fn_decl.base.tree_scope.tree.getNodeSource( - &existing_fn_decl.fn_proto.base, - ); - const new_decl_src = tree_scope.tree.getNodeSource(&fn_proto.base); - if (mem.eql(u8, old_decl_src, new_decl_src)) { - // it's the same, we can skip this decl - continue; - } else { - @panic("TODO decl changed implementation"); - // Add the new thing before dereferencing the old thing. This way we don't end - // up pointlessly re-creating things we end up using in the new thing. - } - } else { - @panic("TODO decl changed kind"); - } - } else { - // add new decl - const fn_decl = try self.gpa().create(Decl.Fn); - fn_decl.* = Decl.Fn{ - .base = Decl{ - .id = Decl.Id.Fn, - .name = name, - .visib = parseVisibToken(tree_scope.tree, fn_proto.visib_token), - .resolution = event.Future(BuildError!void).init(), - .parent_scope = &decl_scope.base, - .tree_scope = tree_scope, - }, - .value = .Unresolved, - .fn_proto = fn_proto, - }; - tree_scope.base.ref(); - errdefer self.gpa().destroy(fn_decl); - - try group.call(addTopLevelDecl, .{ self, &fn_decl.base, locked_table }); - } - }, - .TestDecl => @panic("TODO"), - else => unreachable, - } - } - - var existing_decl_it = existing_decls.iterator(); - while (existing_decl_it.next()) |entry| { - // this decl was deleted - const existing_decl = entry.value; - @panic("TODO handle decl deletion"); - } - } - - fn initialCompile(self: *Compilation) !void { - if (self.root_src_path) |root_src_path| { - const root_scope = blk: { - // TODO async/await fs.realpath - const root_src_real_path = fs.realpathAlloc(self.gpa(), root_src_path) catch |err| { - try self.addCompileErrorCli(root_src_path, "unable to open: {}", .{@errorName(err)}); - return; - }; - errdefer self.gpa().free(root_src_real_path); - - break :blk try Scope.Root.create(self, root_src_real_path); - }; - defer root_scope.base.deref(self); - - // assert((try self.fs_watch.addFile(root_scope.realpath, root_scope)) == null); - try self.rebuildFile(root_scope); - } - } - - fn maybeLink(self: *Compilation) !void { - (self.prelink_group.wait()) catch |err| switch (err) { - error.SemanticAnalysisFailed => {}, - else => return err, - }; - - const any_prelink_errors = blk: { - const compile_errors = self.compile_errors.acquire(); - defer compile_errors.release(); - - break :blk compile_errors.value.len != 0; - }; - - if (!any_prelink_errors) { - try link(self); - } - } - /// caller takes ownership of resulting Code - fn genAndAnalyzeCode( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - node: *ast.Node, - expected_type: ?*Type, - ) callconv(.Async) !*ir.Code { - const unanalyzed_code = try ir.gen( - comp, - node, - tree_scope, - scope, - ); - defer unanalyzed_code.destroy(comp.gpa()); - - if (comp.verbose_ir) { - std.debug.warn("unanalyzed:\n", .{}); - unanalyzed_code.dump(); - } - - const analyzed_code = try ir.analyze( - comp, - unanalyzed_code, - expected_type, - ); - errdefer analyzed_code.destroy(comp.gpa()); - - if (comp.verbose_ir) { - std.debug.warn("analyzed:\n", .{}); - analyzed_code.dump(); - } - - return analyzed_code; - } - fn addCompTimeBlock( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - comptime_node: *ast.Node.Comptime, - ) callconv(.Async) BuildError!void { - const void_type = Type.Void.get(comp); - defer void_type.base.base.deref(comp); - - const analyzed_code = genAndAnalyzeCode( - comp, - tree_scope, - scope, - comptime_node.expr, - &void_type.base, - ) catch |err| switch (err) { - // This poison value should not cause the errdefers to run. It simply means - // that comp.compile_errors is populated. - error.SemanticAnalysisFailed => return {}, - else => return err, - }; - analyzed_code.destroy(comp.gpa()); - } - fn addTopLevelDecl( - self: *Compilation, - decl: *Decl, - locked_table: *Decl.Table, - ) callconv(.Async) BuildError!void { - const is_export = decl.isExported(decl.tree_scope.tree); - - if (is_export) { - try self.prelink_group.call(verifyUniqueSymbol, .{ self, decl }); - try self.prelink_group.call(resolveDecl, .{ self, decl }); - } - - const gop = try locked_table.getOrPut(decl.name); - if (gop.found_existing) { - try self.addCompileError(decl.tree_scope, decl.getSpan(), "redefinition of '{}'", .{decl.name}); - // TODO note: other definition here - } else { - gop.kv.value = decl; - } - } - - fn addCompileError(self: *Compilation, tree_scope: *Scope.AstTree, span: Span, comptime fmt: []const u8, args: var) !void { - const text = try std.fmt.allocPrint(self.gpa(), fmt, args); - errdefer self.gpa().free(text); - - const msg = try Msg.createFromScope(self, tree_scope, span, text); - errdefer msg.destroy(); - - try self.prelink_group.call(addCompileErrorAsync, .{ self, msg }); - } - - fn addCompileErrorCli(self: *Compilation, realpath: []const u8, comptime fmt: []const u8, args: var) !void { - const text = try std.fmt.allocPrint(self.gpa(), fmt, args); - errdefer self.gpa().free(text); - - const msg = try Msg.createFromCli(self, realpath, text); - errdefer msg.destroy(); - - try self.prelink_group.call(addCompileErrorAsync, .{ self, msg }); - } - fn addCompileErrorAsync( - self: *Compilation, - msg: *Msg, - ) callconv(.Async) BuildError!void { - errdefer msg.destroy(); - - const compile_errors = self.compile_errors.acquire(); - defer compile_errors.release(); - - try compile_errors.value.append(msg); - } - fn verifyUniqueSymbol(self: *Compilation, decl: *Decl) callconv(.Async) BuildError!void { - const exported_symbol_names = self.exported_symbol_names.acquire(); - defer exported_symbol_names.release(); - - if (try exported_symbol_names.value.put(decl.name, decl)) |other_decl| { - try self.addCompileError(decl.tree_scope, decl.getSpan(), "exported symbol collision: '{}'", .{ - decl.name, - }); - // TODO add error note showing location of other symbol - } - } - - pub fn haveLibC(self: *Compilation) bool { - return self.libc_link_lib != null; - } - - pub fn addLinkLib(self: *Compilation, name: []const u8, provided_explicitly: bool) !*LinkLib { - const is_libc = mem.eql(u8, name, "c"); - - if (is_libc) { - if (self.libc_link_lib) |libc_link_lib| { - return libc_link_lib; - } - } - - for (self.link_libs_list.span()) |existing_lib| { - if (mem.eql(u8, name, existing_lib.name)) { - return existing_lib; - } - } - - const link_lib = try self.gpa().create(LinkLib); - link_lib.* = LinkLib{ - .name = name, - .path = null, - .provided_explicitly = provided_explicitly, - .symbols = ArrayList([]u8).init(self.gpa()), - }; - try self.link_libs_list.append(link_lib); - if (is_libc) { - self.libc_link_lib = link_lib; - - // get a head start on looking for the native libc - // TODO this is missing a bunch of logic related to whether the target is native - // and whether we can build libc - if (self.override_libc == null) { - try self.deinit_group.call(startFindingNativeLibC, .{self}); - } - } - return link_lib; - } - fn startFindingNativeLibC(self: *Compilation) callconv(.Async) void { - event.Loop.startCpuBoundOperation(); - // we don't care if it fails, we're just trying to kick off the future resolution - _ = self.zig_compiler.getNativeLibC() catch return; - } - - /// General Purpose Allocator. Must free when done. - fn gpa(self: Compilation) *mem.Allocator { - return self.zig_compiler.allocator; - } - - /// Arena Allocator. Automatically freed when the Compilation is destroyed. - fn arena(self: *Compilation) *mem.Allocator { - return &self.arena_allocator.allocator; - } - - /// If the temporary directory for this compilation has not been created, it creates it. - /// Then it creates a random file name in that dir and returns it. - pub fn createRandomOutputPath(self: *Compilation, suffix: []const u8) !ArrayListSentineled(u8, 0) { - const tmp_dir = try self.getTmpDir(); - const file_prefix = self.getRandomFileName(); - - const file_name = try std.fmt.allocPrint(self.gpa(), "{}{}", .{ file_prefix[0..], suffix }); - defer self.gpa().free(file_name); - - const full_path = try fs.path.join(self.gpa(), &[_][]const u8{ tmp_dir, file_name[0..] }); - errdefer self.gpa().free(full_path); - - return ArrayListSentineled(u8, 0).fromOwnedSlice(self.gpa(), full_path); - } - - /// If the temporary directory for this Compilation has not been created, creates it. - /// Then returns it. The directory is unique to this Compilation and cleaned up when - /// the Compilation deinitializes. - fn getTmpDir(self: *Compilation) ![]const u8 { - if (self.tmp_dir.start()) |ptr| return ptr.*; - self.tmp_dir.data = self.getTmpDirImpl(); - self.tmp_dir.resolve(); - return self.tmp_dir.data; - } - - fn getTmpDirImpl(self: *Compilation) ![]u8 { - const comp_dir_name = self.getRandomFileName(); - const zig_dir_path = try getZigDir(self.gpa()); - defer self.gpa().free(zig_dir_path); - - const tmp_dir = try fs.path.join(self.arena(), &[_][]const u8{ zig_dir_path, comp_dir_name[0..] }); - try fs.cwd().makePath(tmp_dir); - return tmp_dir; - } - - fn getRandomFileName(self: *Compilation) [12]u8 { - // here we replace the standard +/ with -_ so that it can be used in a file name - const b64_fs_encoder = std.base64.Base64Encoder.init( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", - std.base64.standard_pad_char, - ); - - var rand_bytes: [9]u8 = undefined; - - { - const held = self.zig_compiler.prng.acquire(); - defer held.release(); - - held.value.random.bytes(rand_bytes[0..]); - } - - var result: [12]u8 = undefined; - b64_fs_encoder.encode(result[0..], &rand_bytes); - return result; - } - - fn registerGarbage(comp: *Compilation, comptime T: type, node: *std.atomic.Stack(*T).Node) void { - // TODO put the garbage somewhere - } - - /// Returns a value which has been ref()'d once - fn analyzeConstValue( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - node: *ast.Node, - expected_type: *Type, - ) !*Value { - var frame = try comp.gpa().create(@Frame(genAndAnalyzeCode)); - defer comp.gpa().destroy(frame); - frame.* = async comp.genAndAnalyzeCode(tree_scope, scope, node, expected_type); - const analyzed_code = try await frame; - defer analyzed_code.destroy(comp.gpa()); - - return analyzed_code.getCompTimeResult(comp); - } - - fn analyzeTypeExpr(comp: *Compilation, tree_scope: *Scope.AstTree, scope: *Scope, node: *ast.Node) !*Type { - const meta_type = &Type.MetaType.get(comp).base; - defer meta_type.base.deref(comp); - - const result_val = try comp.analyzeConstValue(tree_scope, scope, node, meta_type); - errdefer result_val.base.deref(comp); - - return result_val.cast(Type).?; - } - - /// This declaration has been blessed as going into the final code generation. - pub fn resolveDecl(comp: *Compilation, decl: *Decl) callconv(.Async) BuildError!void { - if (decl.resolution.start()) |ptr| return ptr.*; - - decl.resolution.data = try generateDecl(comp, decl); - decl.resolution.resolve(); - return decl.resolution.data; - } -}; - -fn parseVisibToken(tree: *ast.Tree, optional_token_index: ?ast.TokenIndex) Visib { - if (optional_token_index) |token_index| { - const token = tree.tokens.at(token_index); - assert(token.id == Token.Id.Keyword_pub); - return Visib.Pub; - } else { - return Visib.Private; - } -} - -/// The function that actually does the generation. -fn generateDecl(comp: *Compilation, decl: *Decl) !void { - switch (decl.id) { - .Var => @panic("TODO"), - .Fn => { - const fn_decl = @fieldParentPtr(Decl.Fn, "base", decl); - return generateDeclFn(comp, fn_decl); - }, - .CompTime => @panic("TODO"), - } -} - -fn generateDeclFn(comp: *Compilation, fn_decl: *Decl.Fn) !void { - const tree_scope = fn_decl.base.tree_scope; - - const body_node = fn_decl.fn_proto.body_node orelse return generateDeclFnProto(comp, fn_decl); - - const fndef_scope = try Scope.FnDef.create(comp, fn_decl.base.parent_scope); - defer fndef_scope.base.deref(comp); - - const fn_type = try analyzeFnType(comp, tree_scope, fn_decl.base.parent_scope, fn_decl.fn_proto); - defer fn_type.base.base.deref(comp); - - var symbol_name = try std.ArrayListSentineled(u8, 0).init(comp.gpa(), fn_decl.base.name); - var symbol_name_consumed = false; - errdefer if (!symbol_name_consumed) symbol_name.deinit(); - - // The Decl.Fn owns the initial 1 reference count - const fn_val = try Value.Fn.create(comp, fn_type, fndef_scope, symbol_name); - fn_decl.value = .{ .Fn = fn_val }; - symbol_name_consumed = true; - - // Define local parameter variables - for (fn_type.key.data.Normal.params) |param, i| { - //AstNode *param_decl_node = get_param_decl_node(fn_table_entry, i); - const param_decl = @fieldParentPtr(ast.Node.ParamDecl, "base", fn_decl.fn_proto.params.at(i).*); - const name_token = param_decl.name_token orelse { - try comp.addCompileError(tree_scope, Span{ - .first = param_decl.firstToken(), - .last = param_decl.type_node.firstToken(), - }, "missing parameter name", .{}); - return error.SemanticAnalysisFailed; - }; - const param_name = tree_scope.tree.tokenSlice(name_token); - - // if (is_noalias && get_codegen_ptr_type(param_type) == nullptr) { - // add_node_error(g, param_decl_node, buf_sprintf("noalias on non-pointer parameter")); - // } - - // TODO check for shadowing - - const var_scope = try Scope.Var.createParam( - comp, - fn_val.child_scope, - param_name, - ¶m_decl.base, - i, - param.typ, - ); - fn_val.child_scope = &var_scope.base; - - try fn_type.non_key.Normal.variable_list.append(var_scope); - } - - var frame = try comp.gpa().create(@Frame(Compilation.genAndAnalyzeCode)); - defer comp.gpa().destroy(frame); - frame.* = async comp.genAndAnalyzeCode( - tree_scope, - fn_val.child_scope, - body_node, - fn_type.key.data.Normal.return_type, - ); - const analyzed_code = try await frame; - errdefer analyzed_code.destroy(comp.gpa()); - - assert(fn_val.block_scope != null); - - // Kick off rendering to LLVM module, but it doesn't block the fn decl - // analysis from being complete. - try comp.prelink_group.call(codegen.renderToLlvm, .{ comp, fn_val, analyzed_code }); - try comp.prelink_group.call(addFnToLinkSet, .{ comp, fn_val }); -} -fn addFnToLinkSet(comp: *Compilation, fn_val: *Value.Fn) callconv(.Async) Compilation.BuildError!void { - fn_val.base.ref(); - defer fn_val.base.deref(comp); - - fn_val.link_set_node.data = fn_val; - - const held = comp.fn_link_set.acquire(); - defer held.release(); - - held.value.append(fn_val.link_set_node); -} - -fn getZigDir(allocator: *mem.Allocator) ![]u8 { - return fs.getAppDataDir(allocator, "zig"); -} - -fn analyzeFnType( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - fn_proto: *ast.Node.FnProto, -) !*Type.Fn { - const return_type_node = switch (fn_proto.return_type) { - .Explicit => |n| n, - .InferErrorSet => |n| n, - }; - const return_type = try comp.analyzeTypeExpr(tree_scope, scope, return_type_node); - return_type.base.deref(comp); - - var params = ArrayList(Type.Fn.Param).init(comp.gpa()); - var params_consumed = false; - defer if (!params_consumed) { - for (params.span()) |param| { - param.typ.base.deref(comp); - } - params.deinit(); - }; - - { - var it = fn_proto.params.iterator(0); - while (it.next()) |param_node_ptr| { - const param_node = param_node_ptr.*.cast(ast.Node.ParamDecl).?; - const param_type = try comp.analyzeTypeExpr(tree_scope, scope, param_node.type_node); - errdefer param_type.base.deref(comp); - try params.append(Type.Fn.Param{ - .typ = param_type, - .is_noalias = param_node.noalias_token != null, - }); - } - } - - const key = Type.Fn.Key{ - .alignment = null, - .data = Type.Fn.Key.Data{ - .Normal = Type.Fn.Key.Normal{ - .return_type = return_type, - .params = params.toOwnedSlice(), - .is_var_args = false, // TODO - .cc = .Unspecified, // TODO - }, - }, - }; - params_consumed = true; - var key_consumed = false; - defer if (!key_consumed) { - for (key.data.Normal.params) |param| { - param.typ.base.deref(comp); - } - comp.gpa().free(key.data.Normal.params); - }; - - const fn_type = try Type.Fn.get(comp, key); - key_consumed = true; - errdefer fn_type.base.base.deref(comp); - - return fn_type; -} - -fn generateDeclFnProto(comp: *Compilation, fn_decl: *Decl.Fn) !void { - const fn_type = try analyzeFnType( - comp, - fn_decl.base.tree_scope, - fn_decl.base.parent_scope, - fn_decl.fn_proto, - ); - defer fn_type.base.base.deref(comp); - - var symbol_name = try std.ArrayListSentineled(u8, 0).init(comp.gpa(), fn_decl.base.name); - var symbol_name_consumed = false; - defer if (!symbol_name_consumed) symbol_name.deinit(); - - // The Decl.Fn owns the initial 1 reference count - const fn_proto_val = try Value.FnProto.create(comp, fn_type, symbol_name); - fn_decl.value = .{ .FnProto = fn_proto_val }; - symbol_name_consumed = true; -} - -pub fn llvmTargetFromTriple(triple: [:0]const u8) !*llvm.Target { - var result: *llvm.Target = undefined; - var err_msg: [*:0]u8 = undefined; - if (llvm.GetTargetFromTriple(triple, &result, &err_msg) != 0) { - std.debug.warn("triple: {s} error: {s}\n", .{ triple, err_msg }); - return error.UnsupportedTarget; - } - return result; -} - -pub fn initializeAllTargets() void { - llvm.InitializeAllTargets(); - llvm.InitializeAllTargetInfos(); - llvm.InitializeAllTargetMCs(); - llvm.InitializeAllAsmPrinters(); - llvm.InitializeAllAsmParsers(); -} - -pub fn getLLVMTriple(allocator: *std.mem.Allocator, target: std.Target) ![:0]u8 { - var result = try std.ArrayListSentineled(u8, 0).initSize(allocator, 0); - defer result.deinit(); - - try result.outStream().print( - "{}-unknown-{}-{}", - .{ @tagName(target.cpu.arch), @tagName(target.os.tag), @tagName(target.abi) }, - ); - - return result.toOwnedSlice(); -} diff --git a/src-self-hosted/errmsg.zig b/src-self-hosted/errmsg.zig deleted file mode 100644 index 5775c1df83..0000000000 --- a/src-self-hosted/errmsg.zig +++ /dev/null @@ -1,284 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const fs = std.fs; -const process = std.process; -const Token = std.zig.Token; -const ast = std.zig.ast; -const TokenIndex = std.zig.ast.TokenIndex; -const Compilation = @import("compilation.zig").Compilation; -const Scope = @import("scope.zig").Scope; - -pub const Color = enum { - Auto, - Off, - On, -}; - -pub const Span = struct { - first: ast.TokenIndex, - last: ast.TokenIndex, - - pub fn token(i: TokenIndex) Span { - return Span{ - .first = i, - .last = i, - }; - } - - pub fn node(n: *ast.Node) Span { - return Span{ - .first = n.firstToken(), - .last = n.lastToken(), - }; - } -}; - -pub const Msg = struct { - text: []u8, - realpath: []u8, - data: Data, - - const Data = union(enum) { - Cli: Cli, - PathAndTree: PathAndTree, - ScopeAndComp: ScopeAndComp, - }; - - const PathAndTree = struct { - span: Span, - tree: *ast.Tree, - allocator: *mem.Allocator, - }; - - const ScopeAndComp = struct { - span: Span, - tree_scope: *Scope.AstTree, - compilation: *Compilation, - }; - - const Cli = struct { - allocator: *mem.Allocator, - }; - - pub fn destroy(self: *Msg) void { - switch (self.data) { - .Cli => |cli| { - cli.allocator.free(self.text); - cli.allocator.free(self.realpath); - cli.allocator.destroy(self); - }, - .PathAndTree => |path_and_tree| { - path_and_tree.allocator.free(self.text); - path_and_tree.allocator.free(self.realpath); - path_and_tree.allocator.destroy(self); - }, - .ScopeAndComp => |scope_and_comp| { - scope_and_comp.tree_scope.base.deref(scope_and_comp.compilation); - scope_and_comp.compilation.gpa().free(self.text); - scope_and_comp.compilation.gpa().free(self.realpath); - scope_and_comp.compilation.gpa().destroy(self); - }, - } - } - - fn getAllocator(self: *const Msg) *mem.Allocator { - switch (self.data) { - .Cli => |cli| return cli.allocator, - .PathAndTree => |path_and_tree| { - return path_and_tree.allocator; - }, - .ScopeAndComp => |scope_and_comp| { - return scope_and_comp.compilation.gpa(); - }, - } - } - - pub fn getTree(self: *const Msg) *ast.Tree { - switch (self.data) { - .Cli => unreachable, - .PathAndTree => |path_and_tree| { - return path_and_tree.tree; - }, - .ScopeAndComp => |scope_and_comp| { - return scope_and_comp.tree_scope.tree; - }, - } - } - - pub fn getSpan(self: *const Msg) Span { - return switch (self.data) { - .Cli => unreachable, - .PathAndTree => |path_and_tree| path_and_tree.span, - .ScopeAndComp => |scope_and_comp| scope_and_comp.span, - }; - } - - /// Takes ownership of text - /// References tree_scope, and derefs when the msg is freed - pub fn createFromScope(comp: *Compilation, tree_scope: *Scope.AstTree, span: Span, text: []u8) !*Msg { - const realpath = try mem.dupe(comp.gpa(), u8, tree_scope.root().realpath); - errdefer comp.gpa().free(realpath); - - const msg = try comp.gpa().create(Msg); - msg.* = Msg{ - .text = text, - .realpath = realpath, - .data = Data{ - .ScopeAndComp = ScopeAndComp{ - .tree_scope = tree_scope, - .compilation = comp, - .span = span, - }, - }, - }; - tree_scope.base.ref(); - return msg; - } - - /// Caller owns returned Msg and must free with `allocator` - /// allocator will additionally be used for printing messages later. - pub fn createFromCli(comp: *Compilation, realpath: []const u8, text: []u8) !*Msg { - const realpath_copy = try mem.dupe(comp.gpa(), u8, realpath); - errdefer comp.gpa().free(realpath_copy); - - const msg = try comp.gpa().create(Msg); - msg.* = Msg{ - .text = text, - .realpath = realpath_copy, - .data = Data{ - .Cli = Cli{ .allocator = comp.gpa() }, - }, - }; - return msg; - } - - pub fn createFromParseErrorAndScope( - comp: *Compilation, - tree_scope: *Scope.AstTree, - parse_error: *const ast.Error, - ) !*Msg { - const loc_token = parse_error.loc(); - var text_buf = std.ArrayList(u8).init(comp.gpa()); - defer text_buf.deinit(); - - const realpath_copy = try mem.dupe(comp.gpa(), u8, tree_scope.root().realpath); - errdefer comp.gpa().free(realpath_copy); - - try parse_error.render(&tree_scope.tree.tokens, text_buf.outStream()); - - const msg = try comp.gpa().create(Msg); - msg.* = Msg{ - .text = undefined, - .realpath = realpath_copy, - .data = Data{ - .ScopeAndComp = ScopeAndComp{ - .tree_scope = tree_scope, - .compilation = comp, - .span = Span{ - .first = loc_token, - .last = loc_token, - }, - }, - }, - }; - tree_scope.base.ref(); - msg.text = text_buf.toOwnedSlice(); - return msg; - } - - /// `realpath` must outlive the returned Msg - /// `tree` must outlive the returned Msg - /// Caller owns returned Msg and must free with `allocator` - /// allocator will additionally be used for printing messages later. - pub fn createFromParseError( - allocator: *mem.Allocator, - parse_error: *const ast.Error, - tree: *ast.Tree, - realpath: []const u8, - ) !*Msg { - const loc_token = parse_error.loc(); - var text_buf = std.ArrayList(u8).init(allocator); - defer text_buf.deinit(); - - const realpath_copy = try mem.dupe(allocator, u8, realpath); - errdefer allocator.free(realpath_copy); - - try parse_error.render(&tree.tokens, text_buf.outStream()); - - const msg = try allocator.create(Msg); - msg.* = Msg{ - .text = undefined, - .realpath = realpath_copy, - .data = Data{ - .PathAndTree = PathAndTree{ - .allocator = allocator, - .tree = tree, - .span = Span{ - .first = loc_token, - .last = loc_token, - }, - }, - }, - }; - msg.text = text_buf.toOwnedSlice(); - errdefer allocator.destroy(msg); - - return msg; - } - - pub fn printToStream(msg: *const Msg, stream: var, color_on: bool) !void { - switch (msg.data) { - .Cli => { - try stream.print("{}:-:-: error: {}\n", .{ msg.realpath, msg.text }); - return; - }, - else => {}, - } - - const allocator = msg.getAllocator(); - const tree = msg.getTree(); - - const cwd = try process.getCwdAlloc(allocator); - defer allocator.free(cwd); - - const relpath = try fs.path.relative(allocator, cwd, msg.realpath); - defer allocator.free(relpath); - - const path = if (relpath.len < msg.realpath.len) relpath else msg.realpath; - const span = msg.getSpan(); - - const first_token = tree.tokens.at(span.first); - const last_token = tree.tokens.at(span.last); - const start_loc = tree.tokenLocationPtr(0, first_token); - const end_loc = tree.tokenLocationPtr(first_token.end, last_token); - if (!color_on) { - try stream.print("{}:{}:{}: error: {}\n", .{ - path, - start_loc.line + 1, - start_loc.column + 1, - msg.text, - }); - return; - } - - try stream.print("{}:{}:{}: error: {}\n{}\n", .{ - path, - start_loc.line + 1, - start_loc.column + 1, - msg.text, - tree.source[start_loc.line_start..start_loc.line_end], - }); - try stream.writeByteNTimes(' ', start_loc.column); - try stream.writeByteNTimes('~', last_token.end - first_token.start); - try stream.writeAll("\n"); - } - - pub fn printToFile(msg: *const Msg, file: fs.File, color: Color) !void { - const color_on = switch (color) { - .Auto => file.isTty(), - .On => true, - .Off => false, - }; - return msg.printToStream(file.outStream(), color_on); - } -}; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 92a5aa7fdf..cf2b65c719 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -922,7 +922,6 @@ pub const Module = struct { if (self.decl_table.get(hash)) |kv| { return kv.value; } else { - std.debug.warn("creating new decl for {}\n", .{old_inst.name}); const new_decl = blk: { try self.decl_table.ensureCapacity(self.decl_table.size + 1); const new_decl = try self.allocator.create(Decl); @@ -2161,101 +2160,3 @@ pub const ErrorMsg = struct { self.* = undefined; } }; - -pub fn main() anyerror!void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = if (std.builtin.link_libc) std.heap.c_allocator else &arena.allocator; - - const args = try std.process.argsAlloc(allocator); - defer std.process.argsFree(allocator, args); - - const src_path = args[1]; - const bin_path = args[2]; - const debug_error_trace = false; - const output_zir = false; - const object_format: ?std.builtin.ObjectFormat = null; - - const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); - - var bin_file = try link.openBinFilePath(allocator, std.fs.cwd(), bin_path, .{ - .target = native_info.target, - .output_mode = .Exe, - .link_mode = .Static, - .object_format = object_format orelse native_info.target.getObjectFormat(), - }); - defer bin_file.deinit(); - - var module = blk: { - const root_pkg = try Package.create(allocator, std.fs.cwd(), ".", src_path); - errdefer root_pkg.destroy(); - - const root_scope = try allocator.create(Module.Scope.ZIRModule); - errdefer allocator.destroy(root_scope); - root_scope.* = .{ - .sub_file_path = root_pkg.root_src_path, - .source = .{ .unloaded = {} }, - .contents = .{ .not_available = {} }, - .status = .never_loaded, - }; - - break :blk Module{ - .allocator = allocator, - .root_pkg = root_pkg, - .root_scope = root_scope, - .bin_file = &bin_file, - .optimize_mode = .Debug, - .decl_table = std.AutoHashMap(Module.Decl.Hash, *Module.Decl).init(allocator), - .decl_exports = std.AutoHashMap(*Module.Decl, []*Module.Export).init(allocator), - .export_owners = std.AutoHashMap(*Module.Decl, []*Module.Export).init(allocator), - .failed_decls = std.AutoHashMap(*Module.Decl, *ErrorMsg).init(allocator), - .failed_files = std.AutoHashMap(*Module.Scope.ZIRModule, *ErrorMsg).init(allocator), - .failed_exports = std.AutoHashMap(*Module.Export, *ErrorMsg).init(allocator), - .work_queue = std.fifo.LinearFifo(Module.WorkItem, .Dynamic).init(allocator), - }; - }; - defer module.deinit(); - - const stdin = std.io.getStdIn().inStream(); - const stderr = std.io.getStdErr().outStream(); - var repl_buf: [1024]u8 = undefined; - - while (true) { - try module.update(); - - var errors = try module.getAllErrorsAlloc(); - defer errors.deinit(allocator); - - if (errors.list.len != 0) { - for (errors.list) |full_err_msg| { - std.debug.warn("{}:{}:{}: error: {}\n", .{ - full_err_msg.src_path, - full_err_msg.line + 1, - full_err_msg.column + 1, - full_err_msg.msg, - }); - } - if (debug_error_trace) return error.AnalysisFail; - } - - try stderr.print("🦎 ", .{}); - if (try stdin.readUntilDelimiterOrEof(&repl_buf, '\n')) |line| { - if (mem.eql(u8, line, "update")) { - continue; - } else { - try stderr.print("unknown command: {}\n", .{line}); - } - } else { - break; - } - } - - if (output_zir) { - var new_zir_module = try text.emit_zir(allocator, module); - defer new_zir_module.deinit(allocator); - - var bos = std.io.bufferedOutStream(std.io.getStdOut().outStream()); - try new_zir_module.writeToStream(allocator, bos.outStream()); - try bos.flush(); - } -} diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index f283fb5410..e0cc4e122c 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -20,7 +20,7 @@ pub const Inst = struct { name: []const u8, /// Slice into the source of the part after the = and before the next instruction. - contents: []const u8, + contents: []const u8 = &[0]u8{}, /// These names are used directly as the instruction names in the text format. pub const Tag = enum { @@ -825,7 +825,6 @@ const Parser = struct { .name = inst_name, .src = self.i, .tag = InstType.base_tag, - .contents = undefined, }; if (@hasField(InstType, "ty")) { @@ -960,7 +959,6 @@ const Parser = struct { .name = try self.generateName(), .src = src, .tag = Inst.Str.base_tag, - .contents = undefined, }, .positionals = .{ .bytes = ident }, .kw_args = .{}, @@ -971,7 +969,6 @@ const Parser = struct { .name = try self.generateName(), .src = src, .tag = Inst.DeclRef.base_tag, - .contents = undefined, }, .positionals = .{ .name = &name.base }, .kw_args = .{}, diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index b7535fca6f..9215e37e8c 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -1,29 +1,30 @@ const std = @import("std"); -const builtin = @import("builtin"); - -const event = std.event; -const os = std.os; const io = std.io; const fs = std.fs; const mem = std.mem; const process = std.process; const Allocator = mem.Allocator; const ArrayList = std.ArrayList; +const ast = std.zig.ast; +const ir = @import("ir.zig"); +const link = @import("link.zig"); +const Package = @import("Package.zig"); -const c = @import("c.zig"); -const introspect = @import("introspect.zig"); -const ZigCompiler = @import("compilation.zig").ZigCompiler; -const Compilation = @import("compilation.zig").Compilation; -const Target = std.Target; -const errmsg = @import("errmsg.zig"); const LibCInstallation = @import("libc_installation.zig").LibCInstallation; -pub const io_mode = .evented; +// TODO Improve async I/O enough that we feel comfortable doing this. +//pub const io_mode = .evented; pub const max_src_size = 2 * 1024 * 1024 * 1024; // 2 GiB +pub const Color = enum { + Auto, + Off, + On, +}; + const usage = - \\usage: zig [command] [options] + \\Usage: zig [command] [options] \\ \\Commands: \\ @@ -39,175 +40,154 @@ const usage = \\ ; -const Command = struct { - name: []const u8, - exec: async fn (*Allocator, []const []const u8) anyerror!void, -}; - pub fn main() !void { - const allocator = std.heap.c_allocator; + // TODO general purpose allocator in the zig std lib + const gpa = if (std.builtin.link_libc) std.heap.c_allocator else std.heap.page_allocator; + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = &arena_instance.allocator; - const stderr = io.getStdErr().outStream(); - - const args = try process.argsAlloc(allocator); - defer process.argsFree(allocator, args); + const args = try process.argsAlloc(arena); if (args.len <= 1) { - try stderr.writeAll("expected command argument\n\n"); - try stderr.writeAll(usage); + std.debug.warn("expected command argument\n\n{}", .{usage}); process.exit(1); } const cmd = args[1]; const cmd_args = args[2..]; if (mem.eql(u8, cmd, "build-exe")) { - return buildOutputType(allocator, cmd_args, .Exe); + return buildOutputType(gpa, arena, cmd_args, .Exe); } else if (mem.eql(u8, cmd, "build-lib")) { - return buildOutputType(allocator, cmd_args, .Lib); + return buildOutputType(gpa, arena, cmd_args, .Lib); } else if (mem.eql(u8, cmd, "build-obj")) { - return buildOutputType(allocator, cmd_args, .Obj); + return buildOutputType(gpa, arena, cmd_args, .Obj); } else if (mem.eql(u8, cmd, "fmt")) { - return cmdFmt(allocator, cmd_args); + return cmdFmt(gpa, cmd_args); } else if (mem.eql(u8, cmd, "libc")) { - return cmdLibC(allocator, cmd_args); + return cmdLibC(gpa, cmd_args); } else if (mem.eql(u8, cmd, "targets")) { - const info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); + const info = try std.zig.system.NativeTargetInfo.detect(arena, .{}); const stdout = io.getStdOut().outStream(); - return @import("print_targets.zig").cmdTargets(allocator, cmd_args, stdout, info.target); + return @import("print_targets.zig").cmdTargets(arena, cmd_args, stdout, info.target); } else if (mem.eql(u8, cmd, "version")) { - return cmdVersion(allocator, cmd_args); + // Need to set up the build script to give the version as a comptime value. + std.debug.warn("TODO version command not implemented yet\n", .{}); + return error.Unimplemented; } else if (mem.eql(u8, cmd, "zen")) { - return cmdZen(allocator, cmd_args); + try io.getStdOut().writeAll(info_zen); } else if (mem.eql(u8, cmd, "help")) { - return cmdHelp(allocator, cmd_args); - } else if (mem.eql(u8, cmd, "internal")) { - return cmdInternal(allocator, cmd_args); + try io.getStdOut().writeAll(usage); } else { - try stderr.print("unknown command: {}\n\n", .{args[1]}); - try stderr.writeAll(usage); + std.debug.warn("unknown command: {}\n\n{}", .{ args[1], usage }); process.exit(1); } } const usage_build_generic = - \\usage: zig build-exe [file] - \\ zig build-lib [file] - \\ zig build-obj [file] + \\Usage: zig build-exe [files] + \\ zig build-lib [files] + \\ zig build-obj [files] + \\ + \\Supported file types: + \\ (planned) .zig Zig source code + \\ .zir Zig Intermediate Representation code + \\ (planned) .o ELF object file + \\ (planned) .o MACH-O (macOS) object file + \\ (planned) .obj COFF (Windows) object file + \\ (planned) .lib COFF (Windows) static library + \\ (planned) .a ELF static library + \\ (planned) .so ELF shared object (dynamic link) + \\ (planned) .dll Windows Dynamic Link Library + \\ (planned) .dylib MACH-O (macOS) dynamic library + \\ (planned) .s Target-specific assembly source code + \\ (planned) .S Assembly with C preprocessor (requires LLVM extensions) + \\ (planned) .c C source code (requires LLVM extensions) + \\ (planned) .cpp C++ source code (requires LLVM extensions) + \\ Other C++ extensions: .C .cc .cxx \\ \\General Options: - \\ --help Print this help and exit - \\ --color [auto|off|on] Enable or disable colored error messages + \\ -h, --help Print this help and exit + \\ --watch Enable compiler REPL + \\ --color [auto|off|on] Enable or disable colored error messages + \\ -femit-bin[=path] (default) output machine code + \\ -fno-emit-bin Do not output machine code \\ \\Compile Options: - \\ --libc [file] Provide a file which specifies libc paths - \\ --assembly [source] Add assembly file to build - \\ --emit [filetype] Emit a specific file format as compilation output - \\ --enable-timing-info Print timing diagnostics - \\ --name [name] Override output name - \\ --output [file] Override destination path - \\ --output-h [file] Override generated header file path - \\ --pkg-begin [name] [path] Make package available to import and push current pkg - \\ --pkg-end Pop current pkg - \\ --mode [mode] Set the build mode - \\ debug (default) optimizations off, safety on - \\ release-fast optimizations on, safety off - \\ release-safe optimizations on, safety on - \\ release-small optimize for small binary, safety off - \\ --static Output will be statically linked - \\ --strip Exclude debug symbols - \\ -target [name] -- see the targets command - \\ --eh-frame-hdr enable C++ exception handling by passing --eh-frame-hdr to linker - \\ --verbose-tokenize Turn on compiler debug output for tokenization - \\ --verbose-ast-tree Turn on compiler debug output for parsing into an AST (tree view) - \\ --verbose-ast-fmt Turn on compiler debug output for parsing into an AST (render source) - \\ --verbose-link Turn on compiler debug output for linking - \\ --verbose-ir Turn on compiler debug output for Zig IR - \\ --verbose-llvm-ir Turn on compiler debug output for LLVM IR - \\ --verbose-cimport Turn on compiler debug output for C imports - \\ -dirafter [dir] Same as -isystem but do it last - \\ -isystem [dir] Add additional search path for other .h files - \\ -mllvm [arg] Additional arguments to forward to LLVM's option processing + \\ -target [name] -- see the targets command + \\ -mcpu [cpu] Specify target CPU and feature set + \\ --name [name] Override output name + \\ --mode [mode] Set the build mode + \\ Debug (default) optimizations off, safety on + \\ ReleaseFast optimizations on, safety off + \\ ReleaseSafe optimizations on, safety on + \\ ReleaseSmall optimize for small binary, safety off + \\ --dynamic Force output to be dynamically linked + \\ --strip Exclude debug symbols \\ \\Link Options: - \\ --ar-path [path] Set the path to ar - \\ --each-lib-rpath Add rpath for each used dynamic library - \\ --library [lib] Link against lib - \\ --forbid-library [lib] Make it an error to link against lib - \\ --library-path [dir] Add a directory to the library search path - \\ --linker-script [path] Use a custom linker script - \\ --object [obj] Add object file to build - \\ -rdynamic Add all symbols to the dynamic symbol table - \\ -rpath [path] Add directory to the runtime library search path - \\ -framework [name] (darwin) link against framework - \\ -mios-version-min [ver] (darwin) set iOS deployment target - \\ -mmacosx-version-min [ver] (darwin) set Mac OS X deployment target - \\ --ver-major [ver] Dynamic library semver major version - \\ --ver-minor [ver] Dynamic library semver minor version - \\ --ver-patch [ver] Dynamic library semver patch version + \\ -l[lib], --library [lib] Link against system library + \\ --dynamic-linker [path] Set the dynamic interpreter path (usually ld.so) + \\ --version [ver] Dynamic library semver \\ + \\Debug Options (Zig Compiler Development): + \\ -ftime-report Print timing diagnostics + \\ --debug-tokenize verbose tokenization + \\ --debug-ast-tree verbose parsing into an AST (tree view) + \\ --debug-ast-fmt verbose parsing into an AST (render source) + \\ --debug-ir verbose Zig IR + \\ --debug-link verbose linking + \\ --debug-codegen verbose machine code generation \\ ; -fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Compilation.Kind) !void { - const stderr = io.getStdErr().outStream(); +const Emit = union(enum) { + no, + yes_default_path, + yes: []const u8, +}; - var color: errmsg.Color = .Auto; +fn buildOutputType( + gpa: *Allocator, + arena: *Allocator, + args: []const []const u8, + output_mode: std.builtin.OutputMode, +) !void { + var color: Color = .Auto; var build_mode: std.builtin.Mode = .Debug; - var emit_bin = true; - var emit_asm = false; - var emit_llvm_ir = false; - var emit_h = false; var provided_name: ?[]const u8 = null; var is_dynamic = false; var root_src_file: ?[]const u8 = null; - var libc_arg: ?[]const u8 = null; var version: std.builtin.Version = .{ .major = 0, .minor = 0, .patch = 0 }; - var linker_script: ?[]const u8 = null; var strip = false; - var verbose_tokenize = false; - var verbose_ast_tree = false; - var verbose_ast_fmt = false; - var verbose_link = false; - var verbose_ir = false; - var verbose_llvm_ir = false; - var verbose_cimport = false; - var linker_rdynamic = false; - var link_eh_frame_hdr = false; - var macosx_version_min: ?[]const u8 = null; - var ios_version_min: ?[]const u8 = null; + var watch = false; + var debug_tokenize = false; + var debug_ast_tree = false; + var debug_ast_fmt = false; + var debug_link = false; + var debug_ir = false; + var debug_codegen = false; + var time_report = false; + var emit_bin: Emit = .yes_default_path; + var emit_zir: Emit = .no; + var target_arch_os_abi: []const u8 = "native"; + var target_mcpu: ?[]const u8 = null; + var target_dynamic_linker: ?[]const u8 = null; - var assembly_files = ArrayList([]const u8).init(allocator); - defer assembly_files.deinit(); - - var link_objects = ArrayList([]const u8).init(allocator); - defer link_objects.deinit(); - - var clang_argv_buf = ArrayList([]const u8).init(allocator); - defer clang_argv_buf.deinit(); - - var mllvm_flags = ArrayList([]const u8).init(allocator); - defer mllvm_flags.deinit(); - - var cur_pkg = try CliPkg.init(allocator, "", "", null); - defer cur_pkg.deinit(); - - var system_libs = ArrayList([]const u8).init(allocator); + var system_libs = std.ArrayList([]const u8).init(gpa); defer system_libs.deinit(); - var c_src_files = ArrayList([]const u8).init(allocator); - defer c_src_files.deinit(); - { var i: usize = 0; while (i < args.len) : (i += 1) { const arg = args[i]; if (mem.startsWith(u8, arg, "-")) { - if (mem.eql(u8, arg, "--help")) { + if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) { try io.getStdOut().writeAll(usage_build_generic); process.exit(0); } else if (mem.eql(u8, arg, "--color")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected [auto|on|off] after --color\n"); + std.debug.warn("expected [auto|on|off] after --color\n", .{}); process.exit(1); } i += 1; @@ -219,12 +199,12 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Co } else if (mem.eql(u8, next_arg, "off")) { color = .Off; } else { - try stderr.print("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); + std.debug.warn("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); process.exit(1); } } else if (mem.eql(u8, arg, "--mode")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode\n"); + std.debug.warn("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode\n", .{}); process.exit(1); } i += 1; @@ -238,289 +218,317 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Co } else if (mem.eql(u8, next_arg, "ReleaseSmall")) { build_mode = .ReleaseSmall; } else { - try stderr.print("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode, found '{}'\n", .{next_arg}); + std.debug.warn("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode, found '{}'\n", .{next_arg}); process.exit(1); } } else if (mem.eql(u8, arg, "--name")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --name\n"); + std.debug.warn("expected parameter after --name\n", .{}); process.exit(1); } i += 1; provided_name = args[i]; - } else if (mem.eql(u8, arg, "--ver-major")) { + } else if (mem.eql(u8, arg, "--library")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --ver-major\n"); + std.debug.warn("expected parameter after --library\n", .{}); process.exit(1); } i += 1; - version.major = try std.fmt.parseInt(u32, args[i], 10); - } else if (mem.eql(u8, arg, "--ver-minor")) { + try system_libs.append(args[i]); + } else if (mem.eql(u8, arg, "--version")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --ver-minor\n"); + std.debug.warn("expected parameter after --version\n", .{}); process.exit(1); } i += 1; - version.minor = try std.fmt.parseInt(u32, args[i], 10); - } else if (mem.eql(u8, arg, "--ver-patch")) { + version = std.builtin.Version.parse(args[i]) catch |err| { + std.debug.warn("unable to parse --version '{}': {}\n", .{ args[i], @errorName(err) }); + process.exit(1); + }; + } else if (mem.eql(u8, arg, "-target")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --ver-patch\n"); + std.debug.warn("expected parameter after -target\n", .{}); process.exit(1); } i += 1; - version.patch = try std.fmt.parseInt(u32, args[i], 10); - } else if (mem.eql(u8, arg, "--linker-script")) { + target_arch_os_abi = args[i]; + } else if (mem.eql(u8, arg, "-mcpu")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --linker-script\n"); + std.debug.warn("expected parameter after -mcpu\n", .{}); process.exit(1); } i += 1; - linker_script = args[i]; - } else if (mem.eql(u8, arg, "--libc")) { + target_mcpu = args[i]; + } else if (mem.startsWith(u8, arg, "-mcpu=")) { + target_mcpu = arg["-mcpu=".len..]; + } else if (mem.eql(u8, arg, "--dynamic-linker")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --libc\n"); + std.debug.warn("expected parameter after --dynamic-linker\n", .{}); process.exit(1); } i += 1; - libc_arg = args[i]; - } else if (mem.eql(u8, arg, "-mllvm")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after -mllvm\n"); - process.exit(1); - } - i += 1; - try clang_argv_buf.append("-mllvm"); - try clang_argv_buf.append(args[i]); - - try mllvm_flags.append(args[i]); - } else if (mem.eql(u8, arg, "-mmacosx-version-min")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after -mmacosx-version-min\n"); - process.exit(1); - } - i += 1; - macosx_version_min = args[i]; - } else if (mem.eql(u8, arg, "-mios-version-min")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after -mios-version-min\n"); - process.exit(1); - } - i += 1; - ios_version_min = args[i]; + target_dynamic_linker = args[i]; + } else if (mem.eql(u8, arg, "--watch")) { + watch = true; + } else if (mem.eql(u8, arg, "-ftime-report")) { + time_report = true; } else if (mem.eql(u8, arg, "-femit-bin")) { - emit_bin = true; + emit_bin = .yes_default_path; + } else if (mem.startsWith(u8, arg, "-femit-bin=")) { + emit_bin = .{ .yes = arg["-femit-bin=".len..] }; } else if (mem.eql(u8, arg, "-fno-emit-bin")) { - emit_bin = false; - } else if (mem.eql(u8, arg, "-femit-asm")) { - emit_asm = true; - } else if (mem.eql(u8, arg, "-fno-emit-asm")) { - emit_asm = false; - } else if (mem.eql(u8, arg, "-femit-llvm-ir")) { - emit_llvm_ir = true; - } else if (mem.eql(u8, arg, "-fno-emit-llvm-ir")) { - emit_llvm_ir = false; + emit_bin = .no; + } else if (mem.eql(u8, arg, "-femit-zir")) { + emit_zir = .yes_default_path; + } else if (mem.startsWith(u8, arg, "-femit-zir=")) { + emit_zir = .{ .yes = arg["-femit-zir=".len..] }; + } else if (mem.eql(u8, arg, "-fno-emit-zir")) { + emit_zir = .no; } else if (mem.eql(u8, arg, "-dynamic")) { is_dynamic = true; } else if (mem.eql(u8, arg, "--strip")) { strip = true; - } else if (mem.eql(u8, arg, "--verbose-tokenize")) { - verbose_tokenize = true; - } else if (mem.eql(u8, arg, "--verbose-ast-tree")) { - verbose_ast_tree = true; - } else if (mem.eql(u8, arg, "--verbose-ast-fmt")) { - verbose_ast_fmt = true; - } else if (mem.eql(u8, arg, "--verbose-link")) { - verbose_link = true; - } else if (mem.eql(u8, arg, "--verbose-ir")) { - verbose_ir = true; - } else if (mem.eql(u8, arg, "--verbose-llvm-ir")) { - verbose_llvm_ir = true; - } else if (mem.eql(u8, arg, "--eh-frame-hdr")) { - link_eh_frame_hdr = true; - } else if (mem.eql(u8, arg, "--verbose-cimport")) { - verbose_cimport = true; - } else if (mem.eql(u8, arg, "-rdynamic")) { - linker_rdynamic = true; - } else if (mem.eql(u8, arg, "--pkg-begin")) { - if (i + 2 >= args.len) { - try stderr.writeAll("expected [name] [path] after --pkg-begin\n"); - process.exit(1); - } - i += 1; - const new_pkg_name = args[i]; - i += 1; - const new_pkg_path = args[i]; - - var new_cur_pkg = try CliPkg.init(allocator, new_pkg_name, new_pkg_path, cur_pkg); - try cur_pkg.children.append(new_cur_pkg); - cur_pkg = new_cur_pkg; - } else if (mem.eql(u8, arg, "--pkg-end")) { - if (cur_pkg.parent) |parent| { - cur_pkg = parent; - } else { - try stderr.writeAll("encountered --pkg-end with no matching --pkg-begin\n"); - process.exit(1); - } + } else if (mem.eql(u8, arg, "--debug-tokenize")) { + debug_tokenize = true; + } else if (mem.eql(u8, arg, "--debug-ast-tree")) { + debug_ast_tree = true; + } else if (mem.eql(u8, arg, "--debug-ast-fmt")) { + debug_ast_fmt = true; + } else if (mem.eql(u8, arg, "--debug-link")) { + debug_link = true; + } else if (mem.eql(u8, arg, "--debug-ir")) { + debug_ir = true; + } else if (mem.eql(u8, arg, "--debug-codegen")) { + debug_codegen = true; } else if (mem.startsWith(u8, arg, "-l")) { try system_libs.append(arg[2..]); } else { - try stderr.print("unrecognized parameter: '{}'", .{arg}); + std.debug.warn("unrecognized parameter: '{}'", .{arg}); process.exit(1); } - } else if (mem.endsWith(u8, arg, ".s")) { - try assembly_files.append(arg); + } else if (mem.endsWith(u8, arg, ".s") or mem.endsWith(u8, arg, ".S")) { + std.debug.warn("assembly files not supported yet", .{}); + process.exit(1); } else if (mem.endsWith(u8, arg, ".o") or mem.endsWith(u8, arg, ".obj") or mem.endsWith(u8, arg, ".a") or mem.endsWith(u8, arg, ".lib")) { - try link_objects.append(arg); + std.debug.warn("object files and static libraries not supported yet", .{}); + process.exit(1); } else if (mem.endsWith(u8, arg, ".c") or mem.endsWith(u8, arg, ".cpp")) { - try c_src_files.append(arg); - } else if (mem.endsWith(u8, arg, ".zig")) { + std.debug.warn("compilation of C and C++ source code requires LLVM extensions which are not implemented yet", .{}); + process.exit(1); + } else if (mem.endsWith(u8, arg, ".so") or + mem.endsWith(u8, arg, ".dylib") or + mem.endsWith(u8, arg, ".dll")) + { + std.debug.warn("linking against dynamic libraries not yet supported", .{}); + process.exit(1); + } else if (mem.endsWith(u8, arg, ".zig") or mem.endsWith(u8, arg, ".zir")) { if (root_src_file) |other| { - try stderr.print("found another zig file '{}' after root source file '{}'", .{ - arg, - other, - }); + std.debug.warn("found another zig file '{}' after root source file '{}'", .{ arg, other }); process.exit(1); } else { root_src_file = arg; } } else { - try stderr.print("unrecognized file extension of parameter '{}'", .{arg}); + std.debug.warn("unrecognized file extension of parameter '{}'", .{arg}); } } } - if (cur_pkg.parent != null) { - try stderr.print("unmatched --pkg-begin\n", .{}); - process.exit(1); - } - const root_name = if (provided_name) |n| n else blk: { if (root_src_file) |file| { const basename = fs.path.basename(file); var it = mem.split(basename, "."); break :blk it.next() orelse basename; } else { - try stderr.writeAll("--name [name] not provided and unable to infer\n"); + std.debug.warn("--name [name] not provided and unable to infer\n", .{}); process.exit(1); } }; - if (root_src_file == null and link_objects.len == 0 and assembly_files.len == 0) { - try stderr.writeAll("Expected source file argument or at least one --object or --assembly argument\n"); + if (system_libs.items.len != 0) { + std.debug.warn("linking against system libraries not yet supported", .{}); process.exit(1); } - if (out_type == Compilation.Kind.Obj and link_objects.len != 0) { - try stderr.writeAll("When building an object file, --object arguments are invalid\n"); + var diags: std.zig.CrossTarget.ParseOptions.Diagnostics = .{}; + const cross_target = std.zig.CrossTarget.parse(.{ + .arch_os_abi = target_arch_os_abi, + .cpu_features = target_mcpu, + .dynamic_linker = target_dynamic_linker, + .diagnostics = &diags, + }) catch |err| switch (err) { + error.UnknownCpuModel => { + std.debug.warn("Unknown CPU: '{}'\nAvailable CPUs for architecture '{}':\n", .{ + diags.cpu_name.?, + @tagName(diags.arch.?), + }); + for (diags.arch.?.allCpuModels()) |cpu| { + std.debug.warn(" {}\n", .{cpu.name}); + } + process.exit(1); + }, + error.UnknownCpuFeature => { + std.debug.warn( + \\Unknown CPU feature: '{}' + \\Available CPU features for architecture '{}': + \\ + , .{ + diags.unknown_feature_name, + @tagName(diags.arch.?), + }); + for (diags.arch.?.allFeaturesList()) |feature| { + std.debug.warn(" {}: {}\n", .{ feature.name, feature.description }); + } + process.exit(1); + }, + else => |e| return e, + }; + + const object_format: ?std.builtin.ObjectFormat = null; + var target_info = try std.zig.system.NativeTargetInfo.detect(gpa, cross_target); + if (target_info.cpu_detection_unimplemented) { + // TODO We want to just use detected_info.target but implementing + // CPU model & feature detection is todo so here we rely on LLVM. + std.debug.warn("CPU features detection is not yet available for this system without LLVM extensions\n", .{}); process.exit(1); } - try ZigCompiler.setLlvmArgv(allocator, mllvm_flags.span()); - - const zig_lib_dir = introspect.resolveZigLibDir(allocator) catch process.exit(1); - defer allocator.free(zig_lib_dir); - - var override_libc: LibCInstallation = undefined; - - var zig_compiler = try ZigCompiler.init(allocator); - defer zig_compiler.deinit(); - - var comp = try Compilation.create( - &zig_compiler, - root_name, - root_src_file, - .{}, - out_type, - build_mode, - !is_dynamic, - zig_lib_dir, - ); - defer comp.destroy(); - - if (libc_arg) |libc_path| { - parseLibcPaths(allocator, &override_libc, libc_path); - comp.override_libc = &override_libc; - } - - for (system_libs.span()) |lib| { - _ = try comp.addLinkLib(lib, true); - } - - comp.version = version; - comp.is_test = false; - comp.linker_script = linker_script; - comp.clang_argv = clang_argv_buf.span(); - comp.strip = strip; - - comp.verbose_tokenize = verbose_tokenize; - comp.verbose_ast_tree = verbose_ast_tree; - comp.verbose_ast_fmt = verbose_ast_fmt; - comp.verbose_link = verbose_link; - comp.verbose_ir = verbose_ir; - comp.verbose_llvm_ir = verbose_llvm_ir; - comp.verbose_cimport = verbose_cimport; - - comp.link_eh_frame_hdr = link_eh_frame_hdr; - - comp.err_color = color; - - comp.linker_rdynamic = linker_rdynamic; - - if (macosx_version_min != null and ios_version_min != null) { - try stderr.writeAll("-mmacosx-version-min and -mios-version-min options not allowed together\n"); + const src_path = root_src_file orelse { + std.debug.warn("expected at least one file argument", .{}); process.exit(1); - } + }; - if (macosx_version_min) |ver| { - comp.darwin_version_min = Compilation.DarwinVersionMin{ .MacOS = ver }; - } - if (ios_version_min) |ver| { - comp.darwin_version_min = Compilation.DarwinVersionMin{ .Ios = ver }; - } + const bin_path = switch (emit_bin) { + .no => { + std.debug.warn("-fno-emit-bin not supported yet", .{}); + process.exit(1); + }, + .yes_default_path => try std.fmt.allocPrint(arena, "{}{}", .{ root_name, target_info.target.exeFileExt() }), + .yes => |p| p, + }; - comp.emit_bin = emit_bin; - comp.emit_asm = emit_asm; - comp.emit_llvm_ir = emit_llvm_ir; - comp.emit_h = emit_h; - comp.assembly_files = assembly_files.span(); - comp.link_objects = link_objects.span(); - - comp.start(); - processBuildEvents(comp, color); -} - -fn processBuildEvents(comp: *Compilation, color: errmsg.Color) void { - const stderr_file = io.getStdErr(); - const stderr = stderr_file.outStream(); - var count: usize = 0; - while (!comp.cancelled) { - const build_event = comp.events.get(); - count += 1; - - switch (build_event) { - .Ok => { - stderr.print("Build {} succeeded\n", .{count}) catch process.exit(1); - }, - .Error => |err| { - stderr.print("Build {} failed: {}\n", .{ count, @errorName(err) }) catch process.exit(1); - }, - .Fail => |msgs| { - stderr.print("Build {} compile errors:\n", .{count}) catch process.exit(1); - for (msgs) |msg| { - defer msg.destroy(); - msg.printToFile(stderr_file, color) catch process.exit(1); + const zir_out_path: ?[]const u8 = switch (emit_zir) { + .no => null, + .yes_default_path => blk: { + if (root_src_file) |rsf| { + if (mem.endsWith(u8, rsf, ".zir")) { + break :blk try std.fmt.allocPrint(arena, "{}.out.zir", .{root_name}); } - }, + } + break :blk try std.fmt.allocPrint(arena, "{}.zir", .{root_name}); + }, + .yes => |p| p, + }; + + var bin_file = try link.openBinFilePath(gpa, fs.cwd(), bin_path, .{ + .target = target_info.target, + .output_mode = output_mode, + .link_mode = if (is_dynamic) .Dynamic else .Static, + .object_format = object_format orelse target_info.target.getObjectFormat(), + }); + defer bin_file.deinit(); + + var module = blk: { + const root_pkg = try Package.create(gpa, fs.cwd(), ".", src_path); + errdefer root_pkg.destroy(); + + const root_scope = try gpa.create(ir.Module.Scope.ZIRModule); + errdefer gpa.destroy(root_scope); + root_scope.* = .{ + .sub_file_path = root_pkg.root_src_path, + .source = .{ .unloaded = {} }, + .contents = .{ .not_available = {} }, + .status = .never_loaded, + }; + + break :blk ir.Module{ + .allocator = gpa, + .root_pkg = root_pkg, + .root_scope = root_scope, + .bin_file = &bin_file, + .optimize_mode = .Debug, + .decl_table = std.AutoHashMap(ir.Module.Decl.Hash, *ir.Module.Decl).init(gpa), + .decl_exports = std.AutoHashMap(*ir.Module.Decl, []*ir.Module.Export).init(gpa), + .export_owners = std.AutoHashMap(*ir.Module.Decl, []*ir.Module.Export).init(gpa), + .failed_decls = std.AutoHashMap(*ir.Module.Decl, *ir.ErrorMsg).init(gpa), + .failed_files = std.AutoHashMap(*ir.Module.Scope.ZIRModule, *ir.ErrorMsg).init(gpa), + .failed_exports = std.AutoHashMap(*ir.Module.Export, *ir.ErrorMsg).init(gpa), + .work_queue = std.fifo.LinearFifo(ir.Module.WorkItem, .Dynamic).init(gpa), + }; + }; + defer module.deinit(); + + const stdin = std.io.getStdIn().inStream(); + const stderr = std.io.getStdErr().outStream(); + var repl_buf: [1024]u8 = undefined; + + try updateModule(gpa, &module, zir_out_path); + + while (watch) { + try stderr.print("🦎 ", .{}); + if (stdin.readUntilDelimiterOrEof(&repl_buf, '\n') catch |err| { + try stderr.print("\nUnable to parse command: {}\n", .{@errorName(err)}); + continue; + }) |line| { + if (mem.eql(u8, line, "update")) { + try updateModule(gpa, &module, zir_out_path); + } else if (mem.eql(u8, line, "exit")) { + break; + } else if (mem.eql(u8, line, "help")) { + try stderr.writeAll(repl_help); + } else { + try stderr.print("unknown command: {}\n", .{line}); + } + } else { + break; } } } +fn updateModule(gpa: *Allocator, module: *ir.Module, zir_out_path: ?[]const u8) !void { + try module.update(); + + var errors = try module.getAllErrorsAlloc(); + defer errors.deinit(module.allocator); + + if (errors.list.len != 0) { + for (errors.list) |full_err_msg| { + std.debug.warn("{}:{}:{}: error: {}\n", .{ + full_err_msg.src_path, + full_err_msg.line + 1, + full_err_msg.column + 1, + full_err_msg.msg, + }); + } + } + + if (zir_out_path) |zop| { + var new_zir_module = try ir.text.emit_zir(gpa, module.*); + defer new_zir_module.deinit(gpa); + + const baf = try io.BufferedAtomicFile.create(gpa, fs.cwd(), zop, .{}); + defer baf.destroy(); + + try new_zir_module.writeToStream(gpa, baf.stream()); + + try baf.finish(); + } +} + +const repl_help = + \\Commands: + \\ update Detect changes to source files and update output files. + \\ help Print this text + \\ exit Quit this repl + \\ +; + pub const usage_fmt = \\usage: zig fmt [file]... \\ @@ -539,17 +547,17 @@ pub const usage_fmt = ; const Fmt = struct { - seen: event.Locked(SeenMap), + seen: SeenMap, any_error: bool, - color: errmsg.Color, - allocator: *Allocator, + color: Color, + gpa: *Allocator, - const SeenMap = std.StringHashMap(void); + const SeenMap = std.BufSet; }; -fn parseLibcPaths(allocator: *Allocator, libc: *LibCInstallation, libc_paths_file: []const u8) void { +fn parseLibcPaths(gpa: *Allocator, libc: *LibCInstallation, libc_paths_file: []const u8) void { const stderr = io.getStdErr().outStream(); - libc.* = LibCInstallation.parse(allocator, libc_paths_file, stderr) catch |err| { + libc.* = LibCInstallation.parse(gpa, libc_paths_file, stderr) catch |err| { stderr.print("Unable to parse libc path file '{}': {}.\n" ++ "Try running `zig libc` to see an example for the native target.\n", .{ libc_paths_file, @@ -559,13 +567,13 @@ fn parseLibcPaths(allocator: *Allocator, libc: *LibCInstallation, libc_paths_fil }; } -fn cmdLibC(allocator: *Allocator, args: []const []const u8) !void { +fn cmdLibC(gpa: *Allocator, args: []const []const u8) !void { const stderr = io.getStdErr().outStream(); switch (args.len) { 0 => {}, 1 => { var libc_installation: LibCInstallation = undefined; - parseLibcPaths(allocator, &libc_installation, args[0]); + parseLibcPaths(gpa, &libc_installation, args[0]); return; }, else => { @@ -574,23 +582,20 @@ fn cmdLibC(allocator: *Allocator, args: []const []const u8) !void { }, } - var zig_compiler = try ZigCompiler.init(allocator); - defer zig_compiler.deinit(); - - const libc = zig_compiler.getNativeLibC() catch |err| { + const libc = LibCInstallation.findNative(.{ .allocator = gpa }) catch |err| { stderr.print("unable to find libc: {}\n", .{@errorName(err)}) catch {}; process.exit(1); }; + libc.render(io.getStdOut().outStream()) catch process.exit(1); } -fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { +pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void { const stderr_file = io.getStdErr(); - const stderr = stderr_file.outStream(); - var color: errmsg.Color = .Auto; + var color: Color = .Auto; var stdin_flag: bool = false; var check_flag: bool = false; - var input_files = ArrayList([]const u8).init(allocator); + var input_files = ArrayList([]const u8).init(gpa); { var i: usize = 0; @@ -603,7 +608,7 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { process.exit(0); } else if (mem.eql(u8, arg, "--color")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected [auto|on|off] after --color\n"); + std.debug.warn("expected [auto|on|off] after --color\n", .{}); process.exit(1); } i += 1; @@ -615,7 +620,7 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, next_arg, "off")) { color = .Off; } else { - try stderr.print("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); + std.debug.warn("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); process.exit(1); } } else if (mem.eql(u8, arg, "--stdin")) { @@ -623,7 +628,7 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, arg, "--check")) { check_flag = true; } else { - try stderr.print("unrecognized parameter: '{}'", .{arg}); + std.debug.warn("unrecognized parameter: '{}'", .{arg}); process.exit(1); } } else { @@ -633,60 +638,55 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { } if (stdin_flag) { - if (input_files.len != 0) { - try stderr.writeAll("cannot use --stdin with positional arguments\n"); + if (input_files.items.len != 0) { + std.debug.warn("cannot use --stdin with positional arguments\n", .{}); process.exit(1); } const stdin = io.getStdIn().inStream(); - const source_code = try stdin.readAllAlloc(allocator, max_src_size); - defer allocator.free(source_code); + const source_code = try stdin.readAllAlloc(gpa, max_src_size); + defer gpa.free(source_code); - const tree = std.zig.parse(allocator, source_code) catch |err| { - try stderr.print("error parsing stdin: {}\n", .{err}); + const tree = std.zig.parse(gpa, source_code) catch |err| { + std.debug.warn("error parsing stdin: {}\n", .{err}); process.exit(1); }; defer tree.deinit(); var error_it = tree.errors.iterator(0); while (error_it.next()) |parse_error| { - const msg = try errmsg.Msg.createFromParseError(allocator, parse_error, tree, ""); - defer msg.destroy(); - - try msg.printToFile(io.getStdErr(), color); + try printErrMsgToFile(gpa, parse_error, tree, "", stderr_file, color); } if (tree.errors.len != 0) { process.exit(1); } if (check_flag) { - const anything_changed = try std.zig.render(allocator, io.null_out_stream, tree); - const code: u8 = if (anything_changed) 1 else 0; + const anything_changed = try std.zig.render(gpa, io.null_out_stream, tree); + const code = if (anything_changed) @as(u8, 1) else @as(u8, 0); process.exit(code); } const stdout = io.getStdOut().outStream(); - _ = try std.zig.render(allocator, stdout, tree); + _ = try std.zig.render(gpa, stdout, tree); return; } - if (input_files.len == 0) { - try stderr.writeAll("expected at least one source file argument\n"); + if (input_files.items.len == 0) { + std.debug.warn("expected at least one source file argument\n", .{}); process.exit(1); } var fmt = Fmt{ - .allocator = allocator, - .seen = event.Locked(Fmt.SeenMap).init(Fmt.SeenMap.init(allocator)), + .gpa = gpa, + .seen = Fmt.SeenMap.init(gpa), .any_error = false, .color = color, }; - var group = event.Group(FmtError!void).init(allocator); for (input_files.span()) |file_path| { - try group.call(fmtPath, .{ &fmt, file_path, check_flag }); + try fmtPath(&fmt, file_path, check_flag); } - try group.wait(); if (fmt.any_error) { process.exit(1); } @@ -711,54 +711,45 @@ const FmtError = error{ ReadOnlyFileSystem, LinkQuotaExceeded, FileBusy, - CurrentWorkingDirectoryUnlinked, } || fs.File.OpenError; -async fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) FmtError!void { - const stderr_file = io.getStdErr(); - const stderr = stderr_file.outStream(); +fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool) FmtError!void { + // get the real path here to avoid Windows failing on relative file paths with . or .. in them + var real_path = fs.realpathAlloc(fmt.gpa, file_path) catch |err| { + std.debug.warn("unable to open '{}': {}\n", .{ file_path, err }); + fmt.any_error = true; + return; + }; + defer fmt.gpa.free(real_path); - const file_path = try std.mem.dupe(fmt.allocator, u8, file_path_ref); - defer fmt.allocator.free(file_path); + if (fmt.seen.exists(real_path)) return; + try fmt.seen.put(real_path); - { - const held = fmt.seen.acquire(); - defer held.release(); - - if (try held.value.put(file_path, {})) |_| return; - } - - const source_code = fs.cwd().readFileAlloc( - fmt.allocator, - file_path, - max_src_size, - ) catch |err| switch (err) { + const source_code = fs.cwd().readFileAlloc(fmt.gpa, real_path, max_src_size) catch |err| switch (err) { error.IsDir, error.AccessDenied => { var dir = try fs.cwd().openDir(file_path, .{ .iterate = true }); defer dir.close(); - var group = event.Group(FmtError!void).init(fmt.allocator); - var it = dir.iterate(); - while (try it.next()) |entry| { + var dir_it = dir.iterate(); + + while (try dir_it.next()) |entry| { if (entry.kind == .Directory or mem.endsWith(u8, entry.name, ".zig")) { - const full_path = try fs.path.join(fmt.allocator, &[_][]const u8{ file_path, entry.name }); - @panic("TODO https://github.com/ziglang/zig/issues/3777"); - // try group.call(fmtPath, .{fmt, full_path, check_mode}); + const full_path = try fs.path.join(fmt.gpa, &[_][]const u8{ file_path, entry.name }); + try fmtPath(fmt, full_path, check_mode); } } - return group.wait(); + return; }, else => { - // TODO lock stderr printing - try stderr.print("unable to open '{}': {}\n", .{ file_path, err }); + std.debug.warn("unable to open '{}': {}\n", .{ file_path, err }); fmt.any_error = true; return; }, }; - defer fmt.allocator.free(source_code); + defer fmt.gpa.free(source_code); - const tree = std.zig.parse(fmt.allocator, source_code) catch |err| { - try stderr.print("error parsing file '{}': {}\n", .{ file_path, err }); + const tree = std.zig.parse(fmt.gpa, source_code) catch |err| { + std.debug.warn("error parsing file '{}': {}\n", .{ file_path, err }); fmt.any_error = true; return; }; @@ -766,10 +757,7 @@ async fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) FmtErro var error_it = tree.errors.iterator(0); while (error_it.next()) |parse_error| { - const msg = try errmsg.Msg.createFromParseError(fmt.allocator, parse_error, tree, file_path); - defer fmt.allocator.destroy(msg); - - try msg.printToFile(stderr_file, fmt.color); + try printErrMsgToFile(fmt.gpa, parse_error, tree, file_path, std.io.getStdErr(), fmt.color); } if (tree.errors.len != 0) { fmt.any_error = true; @@ -777,32 +765,67 @@ async fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) FmtErro } if (check_mode) { - const anything_changed = try std.zig.render(fmt.allocator, io.null_out_stream, tree); + const anything_changed = try std.zig.render(fmt.gpa, io.null_out_stream, tree); if (anything_changed) { - try stderr.print("{}\n", .{file_path}); + std.debug.warn("{}\n", .{file_path}); fmt.any_error = true; } } else { - // TODO make this evented - const baf = try io.BufferedAtomicFile.create(fmt.allocator, file_path); + const baf = try io.BufferedAtomicFile.create(fmt.gpa, fs.cwd(), real_path, .{}); defer baf.destroy(); - const anything_changed = try std.zig.render(fmt.allocator, baf.stream(), tree); + const anything_changed = try std.zig.render(fmt.gpa, baf.stream(), tree); if (anything_changed) { - try stderr.print("{}\n", .{file_path}); + std.debug.warn("{}\n", .{file_path}); try baf.finish(); } } } -fn cmdVersion(allocator: *Allocator, args: []const []const u8) !void { - const stdout = io.getStdOut().outStream(); - try stdout.print("{}\n", .{c.ZIG_VERSION_STRING}); -} +fn printErrMsgToFile( + gpa: *mem.Allocator, + parse_error: *const ast.Error, + tree: *ast.Tree, + path: []const u8, + file: fs.File, + color: Color, +) !void { + const color_on = switch (color) { + .Auto => file.isTty(), + .On => true, + .Off => false, + }; + const lok_token = parse_error.loc(); + const span_first = lok_token; + const span_last = lok_token; -fn cmdHelp(allocator: *Allocator, args: []const []const u8) !void { - const stdout = io.getStdOut(); - try stdout.writeAll(usage); + const first_token = tree.tokens.at(span_first); + const last_token = tree.tokens.at(span_last); + const start_loc = tree.tokenLocationPtr(0, first_token); + const end_loc = tree.tokenLocationPtr(first_token.end, last_token); + + var text_buf = std.ArrayList(u8).init(gpa); + defer text_buf.deinit(); + const out_stream = text_buf.outStream(); + try parse_error.render(&tree.tokens, out_stream); + const text = text_buf.span(); + + const stream = file.outStream(); + try stream.print("{}:{}:{}: error: {}\n", .{ path, start_loc.line + 1, start_loc.column + 1, text }); + + if (!color_on) return; + + // Print \r and \t as one space each so that column counts line up + for (tree.source[start_loc.line_start..start_loc.line_end]) |byte| { + try stream.writeByte(switch (byte) { + '\r', '\t' => ' ', + else => byte, + }); + } + try stream.writeByte('\n'); + try stream.writeByteNTimes(' ', start_loc.column); + try stream.writeByteNTimes('~', last_token.end - first_token.start); + try stream.writeByte('\n'); } pub const info_zen = @@ -817,90 +840,8 @@ pub const info_zen = \\ * Avoid local maximums. \\ * Reduce the amount one must remember. \\ * Minimize energy spent on coding style. + \\ * Resource deallocation must succeed. \\ * Together we serve end users. \\ \\ ; - -fn cmdZen(allocator: *Allocator, args: []const []const u8) !void { - try io.getStdOut().writeAll(info_zen); -} - -const usage_internal = - \\usage: zig internal [subcommand] - \\ - \\Sub-Commands: - \\ build-info Print static compiler build-info - \\ - \\ -; - -fn cmdInternal(allocator: *Allocator, args: []const []const u8) !void { - const stderr = io.getStdErr().outStream(); - if (args.len == 0) { - try stderr.writeAll(usage_internal); - process.exit(1); - } - - const sub_commands = [_]Command{Command{ - .name = "build-info", - .exec = cmdInternalBuildInfo, - }}; - - inline for (sub_commands) |sub_command| { - if (mem.eql(u8, sub_command.name, args[0])) { - var frame = try allocator.create(@Frame(sub_command.exec)); - defer allocator.destroy(frame); - frame.* = async sub_command.exec(allocator, args[1..]); - return await frame; - } - } - - try stderr.print("unknown sub command: {}\n\n", .{args[0]}); - try stderr.writeAll(usage_internal); -} - -fn cmdInternalBuildInfo(allocator: *Allocator, args: []const []const u8) !void { - const stdout = io.getStdOut().outStream(); - try stdout.print( - \\ZIG_CMAKE_BINARY_DIR {} - \\ZIG_CXX_COMPILER {} - \\ZIG_LLD_INCLUDE_PATH {} - \\ZIG_LLD_LIBRARIES {} - \\ZIG_LLVM_CONFIG_EXE {} - \\ZIG_DIA_GUIDS_LIB {} - \\ - , .{ - c.ZIG_CMAKE_BINARY_DIR, - c.ZIG_CXX_COMPILER, - c.ZIG_LLD_INCLUDE_PATH, - c.ZIG_LLD_LIBRARIES, - c.ZIG_LLVM_CONFIG_EXE, - c.ZIG_DIA_GUIDS_LIB, - }); -} - -const CliPkg = struct { - name: []const u8, - path: []const u8, - children: ArrayList(*CliPkg), - parent: ?*CliPkg, - - pub fn init(allocator: *mem.Allocator, name: []const u8, path: []const u8, parent: ?*CliPkg) !*CliPkg { - var pkg = try allocator.create(CliPkg); - pkg.* = CliPkg{ - .name = name, - .path = path, - .children = ArrayList(*CliPkg).init(allocator), - .parent = parent, - }; - return pkg; - } - - pub fn deinit(self: *CliPkg) void { - for (self.children.span()) |child| { - child.deinit(); - } - self.children.deinit(); - } -}; diff --git a/src-self-hosted/stage2.zig b/src-self-hosted/stage2.zig index 38ab49ccc4..d1bf73bb9e 100644 --- a/src-self-hosted/stage2.zig +++ b/src-self-hosted/stage2.zig @@ -12,7 +12,6 @@ const ArrayListSentineled = std.ArrayListSentineled; const Target = std.Target; const CrossTarget = std.zig.CrossTarget; const self_hosted_main = @import("main.zig"); -const errmsg = @import("errmsg.zig"); const DepTokenizer = @import("dep_tokenizer.zig").Tokenizer; const assert = std.debug.assert; const LibCInstallation = @import("libc_installation.zig").LibCInstallation; @@ -168,8 +167,6 @@ export fn stage2_render_ast(tree: *ast.Tree, output_file: *FILE) Error { return .None; } -// TODO: just use the actual self-hosted zig fmt. Until https://github.com/ziglang/zig/issues/2377, -// we use a blocking implementation. export fn stage2_fmt(argc: c_int, argv: [*]const [*:0]const u8) c_int { if (std.debug.runtime_safety) { fmtMain(argc, argv) catch unreachable; @@ -191,258 +188,9 @@ fn fmtMain(argc: c_int, argv: [*]const [*:0]const u8) !void { try args_list.append(mem.spanZ(argv[arg_i])); } - stdout = std.io.getStdOut().outStream(); - stderr_file = std.io.getStdErr(); - stderr = stderr_file.outStream(); - const args = args_list.span()[2..]; - var color: errmsg.Color = .Auto; - var stdin_flag: bool = false; - var check_flag: bool = false; - var input_files = ArrayList([]const u8).init(allocator); - - { - var i: usize = 0; - while (i < args.len) : (i += 1) { - const arg = args[i]; - if (mem.startsWith(u8, arg, "-")) { - if (mem.eql(u8, arg, "--help")) { - try stdout.writeAll(self_hosted_main.usage_fmt); - process.exit(0); - } else if (mem.eql(u8, arg, "--color")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected [auto|on|off] after --color\n"); - process.exit(1); - } - i += 1; - const next_arg = args[i]; - if (mem.eql(u8, next_arg, "auto")) { - color = .Auto; - } else if (mem.eql(u8, next_arg, "on")) { - color = .On; - } else if (mem.eql(u8, next_arg, "off")) { - color = .Off; - } else { - try stderr.print("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); - process.exit(1); - } - } else if (mem.eql(u8, arg, "--stdin")) { - stdin_flag = true; - } else if (mem.eql(u8, arg, "--check")) { - check_flag = true; - } else { - try stderr.print("unrecognized parameter: '{}'", .{arg}); - process.exit(1); - } - } else { - try input_files.append(arg); - } - } - } - - if (stdin_flag) { - if (input_files.items.len != 0) { - try stderr.writeAll("cannot use --stdin with positional arguments\n"); - process.exit(1); - } - - const stdin_file = io.getStdIn(); - var stdin = stdin_file.inStream(); - - const source_code = try stdin.readAllAlloc(allocator, self_hosted_main.max_src_size); - defer allocator.free(source_code); - - const tree = std.zig.parse(allocator, source_code) catch |err| { - try stderr.print("error parsing stdin: {}\n", .{err}); - process.exit(1); - }; - defer tree.deinit(); - - var error_it = tree.errors.iterator(0); - while (error_it.next()) |parse_error| { - try printErrMsgToFile(allocator, parse_error, tree, "", stderr_file, color); - } - if (tree.errors.len != 0) { - process.exit(1); - } - if (check_flag) { - const anything_changed = try std.zig.render(allocator, io.null_out_stream, tree); - const code = if (anything_changed) @as(u8, 1) else @as(u8, 0); - process.exit(code); - } - - _ = try std.zig.render(allocator, stdout, tree); - return; - } - - if (input_files.items.len == 0) { - try stderr.writeAll("expected at least one source file argument\n"); - process.exit(1); - } - - var fmt = Fmt{ - .seen = Fmt.SeenMap.init(allocator), - .any_error = false, - .color = color, - .allocator = allocator, - }; - - for (input_files.span()) |file_path| { - try fmtPath(&fmt, file_path, check_flag); - } - if (fmt.any_error) { - process.exit(1); - } -} - -const FmtError = error{ - SystemResources, - OperationAborted, - IoPending, - BrokenPipe, - Unexpected, - WouldBlock, - FileClosed, - DestinationAddressRequired, - DiskQuota, - FileTooBig, - InputOutput, - NoSpaceLeft, - AccessDenied, - OutOfMemory, - RenameAcrossMountPoints, - ReadOnlyFileSystem, - LinkQuotaExceeded, - FileBusy, -} || fs.File.OpenError; - -fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool) FmtError!void { - // get the real path here to avoid Windows failing on relative file paths with . or .. in them - var real_path = fs.realpathAlloc(fmt.allocator, file_path) catch |err| { - try stderr.print("unable to open '{}': {}\n", .{ file_path, err }); - fmt.any_error = true; - return; - }; - defer fmt.allocator.free(real_path); - - if (fmt.seen.exists(real_path)) return; - try fmt.seen.put(real_path); - - const source_code = fs.cwd().readFileAlloc(fmt.allocator, real_path, self_hosted_main.max_src_size) catch |err| switch (err) { - error.IsDir, error.AccessDenied => { - // TODO make event based (and dir.next()) - var dir = try fs.cwd().openDir(file_path, .{ .iterate = true }); - defer dir.close(); - - var dir_it = dir.iterate(); - - while (try dir_it.next()) |entry| { - if (entry.kind == .Directory or mem.endsWith(u8, entry.name, ".zig")) { - const full_path = try fs.path.join(fmt.allocator, &[_][]const u8{ file_path, entry.name }); - try fmtPath(fmt, full_path, check_mode); - } - } - return; - }, - else => { - // TODO lock stderr printing - try stderr.print("unable to open '{}': {}\n", .{ file_path, err }); - fmt.any_error = true; - return; - }, - }; - defer fmt.allocator.free(source_code); - - const tree = std.zig.parse(fmt.allocator, source_code) catch |err| { - try stderr.print("error parsing file '{}': {}\n", .{ file_path, err }); - fmt.any_error = true; - return; - }; - defer tree.deinit(); - - var error_it = tree.errors.iterator(0); - while (error_it.next()) |parse_error| { - try printErrMsgToFile(fmt.allocator, parse_error, tree, file_path, stderr_file, fmt.color); - } - if (tree.errors.len != 0) { - fmt.any_error = true; - return; - } - - if (check_mode) { - const anything_changed = try std.zig.render(fmt.allocator, io.null_out_stream, tree); - if (anything_changed) { - try stderr.print("{}\n", .{file_path}); - fmt.any_error = true; - } - } else { - const baf = try io.BufferedAtomicFile.create(fmt.allocator, fs.cwd(), real_path, .{}); - defer baf.destroy(); - - const anything_changed = try std.zig.render(fmt.allocator, baf.stream(), tree); - if (anything_changed) { - try stderr.print("{}\n", .{file_path}); - try baf.finish(); - } - } -} - -const Fmt = struct { - seen: SeenMap, - any_error: bool, - color: errmsg.Color, - allocator: *mem.Allocator, - - const SeenMap = std.BufSet; -}; - -fn printErrMsgToFile( - allocator: *mem.Allocator, - parse_error: *const ast.Error, - tree: *ast.Tree, - path: []const u8, - file: fs.File, - color: errmsg.Color, -) !void { - const color_on = switch (color) { - .Auto => file.isTty(), - .On => true, - .Off => false, - }; - const lok_token = parse_error.loc(); - const span = errmsg.Span{ - .first = lok_token, - .last = lok_token, - }; - - const first_token = tree.tokens.at(span.first); - const last_token = tree.tokens.at(span.last); - const start_loc = tree.tokenLocationPtr(0, first_token); - const end_loc = tree.tokenLocationPtr(first_token.end, last_token); - - var text_buf = std.ArrayList(u8).init(allocator); - defer text_buf.deinit(); - const out_stream = text_buf.outStream(); - try parse_error.render(&tree.tokens, out_stream); - const text = text_buf.span(); - - const stream = file.outStream(); - try stream.print("{}:{}:{}: error: {}\n", .{ path, start_loc.line + 1, start_loc.column + 1, text }); - - if (!color_on) return; - - // Print \r and \t as one space each so that column counts line up - for (tree.source[start_loc.line_start..start_loc.line_end]) |byte| { - try stream.writeByte(switch (byte) { - '\r', '\t' => ' ', - else => byte, - }); - } - try stream.writeByte('\n'); - try stream.writeByteNTimes(' ', start_loc.column); - try stream.writeByteNTimes('~', last_token.end - first_token.start); - try stream.writeByte('\n'); + return self_hosted_main.cmdFmt(allocator, args); } export fn stage2_DepTokenizer_init(input: [*]const u8, len: usize) stage2_DepTokenizer { From 8980f150e94f5542f5228e371fd2919a9922dc12 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 18:54:48 -0400 Subject: [PATCH 14/31] fix memory leaks of one of the ZIR test cases --- src-self-hosted/ir.zig | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index cf2b65c719..2052479dae 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -487,11 +487,13 @@ pub const Module = struct { .loaded_success => { allocator.free(self.source.bytes); self.contents.module.deinit(allocator); + allocator.destroy(self.contents.module); self.status = .unloaded_success; }, .loaded_sema_failure => { allocator.free(self.source.bytes); self.contents.module.deinit(allocator); + allocator.destroy(self.contents.module); self.status = .unloaded_sema_failure; }, .loaded_parse_failure => { @@ -603,7 +605,14 @@ pub const Module = struct { } self.failed_exports.deinit(); } - self.decl_exports.deinit(); + { + var it = self.decl_exports.iterator(); + while (it.next()) |kv| { + const export_list = kv.value; + allocator.free(export_list); + } + self.decl_exports.deinit(); + } { var it = self.export_owners.iterator(); while (it.next()) |kv| { @@ -613,7 +622,7 @@ pub const Module = struct { } allocator.free(export_list); } - self.failed_exports.deinit(); + self.export_owners.deinit(); } self.root_pkg.destroy(); { From 5135238f86c3fd3de423f5d04fe22717f2eb59ae Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 19:11:00 -0400 Subject: [PATCH 15/31] ZIR: emit proper string literals --- src-self-hosted/ir/text.zig | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index e0cc4e122c..d8b5af4e2c 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -1456,6 +1456,21 @@ const EmitZIR = struct { .kw_args = .{}, }; try self.decls.append(self.allocator, &str_inst.base); - return &str_inst.base; + + const ref_inst = try self.arena.allocator.create(Inst.Ref); + ref_inst.* = .{ + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Ref.base_tag, + }, + .positionals = .{ + .operand = &str_inst.base, + }, + .kw_args = .{}, + }; + try self.decls.append(self.allocator, &ref_inst.base); + + return &ref_inst.base; } }; From 64f4ef75566ef34289e9e6a455b0173e4e58df47 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 21:29:52 -0400 Subject: [PATCH 16/31] update ZIR test cases --- test/stage2/zir.zig | 99 ++++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/test/stage2/zir.zig b/test/stage2/zir.zig index 9a65e9ab96..78b6d3c1b2 100644 --- a/test/stage2/zir.zig +++ b/test/stage2/zir.zig @@ -12,10 +12,11 @@ pub fn addCases(ctx: *TestContext) void { \\ \\@entry = fn(@fnty, { \\ %a = str("\x32\x08\x01\x0a") - \\ %eptr0 = elemptr(%a, @0) - \\ %eptr1 = elemptr(%a, @1) - \\ %eptr2 = elemptr(%a, @2) - \\ %eptr3 = elemptr(%a, @3) + \\ %aref = ref(%a) + \\ %eptr0 = elemptr(%aref, @0) + \\ %eptr1 = elemptr(%aref, @1) + \\ %eptr2 = elemptr(%aref, @2) + \\ %eptr3 = elemptr(%aref, @3) \\ %v0 = deref(%eptr0) \\ %v1 = deref(%eptr1) \\ %v2 = deref(%eptr2) @@ -34,7 +35,8 @@ pub fn addCases(ctx: *TestContext) void { \\}) \\ \\@9 = str("entry") - \\@10 = export(@9, @entry) + \\@10 = ref(@9) + \\@11 = export(@10, @entry) , \\@0 = primitive(void) \\@1 = fntype([], @0, cc=C) @@ -42,7 +44,8 @@ pub fn addCases(ctx: *TestContext) void { \\ %0 = return() \\}) \\@3 = str("entry") - \\@4 = export(@3, @2) + \\@4 = ref(@3) + \\@5 = export(@4, @2) \\ ); @@ -55,51 +58,71 @@ pub fn addCases(ctx: *TestContext) void { } ctx.addZIRCompareOutput("hello world ZIR", - \\@0 = str("Hello, world!\n") - \\@1 = primitive(noreturn) - \\@2 = primitive(usize) - \\@3 = fntype([], @1, cc=Naked) - \\@4 = int(0) - \\@5 = int(1) - \\@6 = int(231) - \\@7 = str("len") + \\@noreturn = primitive(noreturn) + \\@void = primitive(void) + \\@usize = primitive(usize) + \\@0 = int(0) + \\@1 = int(1) + \\@2 = int(2) + \\@3 = int(3) \\ - \\@8 = fn(@3, { - \\ %0 = as(@2, @5) ; SYS_write - \\ %1 = as(@2, @5) ; STDOUT_FILENO - \\ %2 = ptrtoint(@0) ; msg ptr - \\ %3 = fieldptr(@0, @7) ; msg len ptr - \\ %4 = deref(%3) ; msg len - \\ %sysoutreg = str("={rax}") - \\ %rax = str("{rax}") - \\ %rdi = str("{rdi}") - \\ %rsi = str("{rsi}") - \\ %rdx = str("{rdx}") - \\ %rcx = str("rcx") - \\ %r11 = str("r11") - \\ %memory = str("memory") - \\ %syscall = str("syscall") - \\ %5 = asm(%syscall, @2, + \\@syscall_array = str("syscall") + \\@sysoutreg_array = str("={rax}") + \\@rax_array = str("{rax}") + \\@rdi_array = str("{rdi}") + \\@rcx_array = str("rcx") + \\@r11_array = str("r11") + \\@rdx_array = str("{rdx}") + \\@rsi_array = str("{rsi}") + \\@memory_array = str("memory") + \\@len_array = str("len") + \\ + \\@msg = str("Hello, world!\n") + \\ + \\@start_fnty = fntype([], @noreturn, cc=Naked) + \\@start = fn(@start_fnty, { + \\ %SYS_exit_group = int(231) + \\ %exit_code = as(@usize, @0) + \\ + \\ %syscall = ref(@syscall_array) + \\ %sysoutreg = ref(@sysoutreg_array) + \\ %rax = ref(@rax_array) + \\ %rdi = ref(@rdi_array) + \\ %rcx = ref(@rcx_array) + \\ %rdx = ref(@rdx_array) + \\ %rsi = ref(@rsi_array) + \\ %r11 = ref(@r11_array) + \\ %memory = ref(@memory_array) + \\ + \\ %SYS_write = as(@usize, @1) + \\ %STDOUT_FILENO = as(@usize, @1) + \\ + \\ %msg_ptr = ref(@msg) + \\ %msg_addr = ptrtoint(%msg_ptr) + \\ + \\ %len_name = ref(@len_array) + \\ %msg_len_ptr = fieldptr(%msg_ptr, %len_name) + \\ %msg_len = deref(%msg_len_ptr) + \\ %rc_write = asm(%syscall, @usize, \\ volatile=1, \\ output=%sysoutreg, \\ inputs=[%rax, %rdi, %rsi, %rdx], \\ clobbers=[%rcx, %r11, %memory], - \\ args=[%0, %1, %2, %4]) + \\ args=[%SYS_write, %STDOUT_FILENO, %msg_addr, %msg_len]) \\ - \\ %6 = as(@2, @6) ;SYS_exit_group - \\ %7 = as(@2, @4) ;exit code - \\ %8 = asm(%syscall, @2, + \\ %rc_exit = asm(%syscall, @usize, \\ volatile=1, \\ output=%sysoutreg, \\ inputs=[%rax, %rdi], \\ clobbers=[%rcx, %r11, %memory], - \\ args=[%6, %7]) + \\ args=[%SYS_exit_group, %exit_code]) \\ - \\ %9 = unreachable() - \\}) + \\ %99 = unreachable() + \\}); \\ \\@9 = str("_start") - \\@10 = export(@9, @8) + \\@10 = ref(@9) + \\@11 = export(@10, @start) , \\Hello, world! \\ From f2feb4e47aa7d74f26f5bda1f8383ccd0f54026a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 21:44:33 -0400 Subject: [PATCH 17/31] move Module to its own file --- src-self-hosted/Module.zig | 2018 ++++++++++++++++++++++ src-self-hosted/codegen.zig | 18 +- src-self-hosted/ir.zig | 2018 +--------------------- src-self-hosted/link.zig | 21 +- src-self-hosted/main.zig | 25 +- src-self-hosted/value.zig | 6 +- src-self-hosted/{ir/text.zig => zir.zig} | 15 +- 7 files changed, 2065 insertions(+), 2056 deletions(-) create mode 100644 src-self-hosted/Module.zig rename src-self-hosted/{ir/text.zig => zir.zig} (99%) diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig new file mode 100644 index 0000000000..ef85f50e31 --- /dev/null +++ b/src-self-hosted/Module.zig @@ -0,0 +1,2018 @@ +const std = @import("std"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; +const Value = @import("value.zig").Value; +const Type = @import("type.zig").Type; +const TypedValue = @import("TypedValue.zig"); +const assert = std.debug.assert; +const BigIntConst = std.math.big.int.Const; +const BigIntMutable = std.math.big.int.Mutable; +const Target = std.Target; +const Package = @import("Package.zig"); +const link = @import("link.zig"); +const ir = @import("ir.zig"); +const zir = @import("zir.zig"); +const Module = @This(); +const Inst = ir.Inst; + +/// General-purpose allocator. +allocator: *Allocator, +/// Module owns this resource. +root_pkg: *Package, +/// Module owns this resource. +root_scope: *Scope.ZIRModule, +/// Pointer to externally managed resource. +bin_file: *link.ElfFile, +/// It's rare for a decl to be exported, so we save memory by having a sparse map of +/// Decl pointers to details about them being exported. +/// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. +decl_exports: std.AutoHashMap(*Decl, []*Export), +/// This models the Decls that perform exports, so that `decl_exports` can be updated when a Decl +/// is modified. Note that the key of this table is not the Decl being exported, but the Decl that +/// is performing the export of another Decl. +/// This table owns the Export memory. +export_owners: std.AutoHashMap(*Decl, []*Export), +/// Maps fully qualified namespaced names to the Decl struct for them. +decl_table: std.AutoHashMap(Decl.Hash, *Decl), + +optimize_mode: std.builtin.Mode, +link_error_flags: link.ElfFile.ErrorFlags = link.ElfFile.ErrorFlags{}, + +work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic), + +/// We optimize memory usage for a compilation with no compile errors by storing the +/// error messages and mapping outside of `Decl`. +/// The ErrorMsg memory is owned by the decl, using Module's allocator. +/// Note that a Decl can succeed but the Fn it represents can fail. In this case, +/// a Decl can have a failed_decls entry but have analysis status of success. +failed_decls: std.AutoHashMap(*Decl, *ErrorMsg), +/// Using a map here for consistency with the other fields here. +/// The ErrorMsg memory is owned by the `Scope.ZIRModule`, using Module's allocator. +failed_files: std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg), +/// Using a map here for consistency with the other fields here. +/// The ErrorMsg memory is owned by the `Export`, using Module's allocator. +failed_exports: std.AutoHashMap(*Export, *ErrorMsg), + +pub const WorkItem = union(enum) { + /// Write the machine code for a Decl to the output file. + codegen_decl: *Decl, +}; + +pub const Export = struct { + options: std.builtin.ExportOptions, + /// Byte offset into the file that contains the export directive. + src: usize, + /// Represents the position of the export, if any, in the output file. + link: link.ElfFile.Export, + /// The Decl that performs the export. Note that this is *not* the Decl being exported. + owner_decl: *Decl, + status: enum { + in_progress, + failed, + /// Indicates that the failure was due to a temporary issue, such as an I/O error + /// when writing to the output file. Retrying the export may succeed. + failed_retryable, + complete, + }, +}; + +pub const Decl = struct { + /// This name is relative to the containing namespace of the decl. It uses a null-termination + /// to save bytes, since there can be a lot of decls in a compilation. The null byte is not allowed + /// in symbol names, because executable file formats use null-terminated strings for symbol names. + /// All Decls have names, even values that are not bound to a zig namespace. This is necessary for + /// mapping them to an address in the output file. + /// Memory owned by this decl, using Module's allocator. + name: [*:0]const u8, + /// The direct parent container of the Decl. This field will need to get more fleshed out when + /// self-hosted supports proper struct types and Zig AST => ZIR. + /// Reference to externally owned memory. + scope: *Scope.ZIRModule, + /// Byte offset into the source file that contains this declaration. + /// This is the base offset that src offsets within this Decl are relative to. + src: usize, + /// The most recent value of the Decl after a successful semantic analysis. + /// The tag for this union is determined by the tag value of the analysis field. + typed_value: union { + never_succeeded: void, + most_recent: TypedValue.Managed, + }, + /// Represents the "shallow" analysis status. For example, for decls that are functions, + /// the function type is analyzed with this set to `in_progress`, however, the semantic + /// analysis of the function body is performed with this value set to `success`. Functions + /// have their own analysis status field. + analysis: enum { + initial_in_progress, + /// This Decl might be OK but it depends on another one which did not successfully complete + /// semantic analysis. This Decl never had a value computed. + initial_dependency_failure, + /// Semantic analysis failure. This Decl never had a value computed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + initial_sema_failure, + /// In this case the `typed_value.most_recent` can still be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + codegen_failure, + /// In this case the `typed_value.most_recent` can still be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + /// This indicates the failure was something like running out of disk space, + /// and attempting codegen again may succeed. + codegen_failure_retryable, + /// This Decl might be OK but it depends on another one which did not successfully complete + /// semantic analysis. There is a most recent value available. + repeat_dependency_failure, + /// Semantic anlaysis failure, but the `typed_value.most_recent` can be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + repeat_sema_failure, + /// Completed successfully before; the `typed_value.most_recent` can be accessed, and + /// new semantic analysis is in progress. + repeat_in_progress, + /// Everything is done and updated. + complete, + }, + + /// Represents the position of the code in the output file. + /// This is populated regardless of semantic analysis and code generation. + link: link.ElfFile.Decl = link.ElfFile.Decl.empty, + + /// The shallow set of other decls whose typed_value could possibly change if this Decl's + /// typed_value is modified. + /// TODO look into using a lightweight map/set data structure rather than a linear array. + dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){}, + + contents_hash: Hash, + + pub fn destroy(self: *Decl, allocator: *Allocator) void { + allocator.free(mem.spanZ(self.name)); + if (self.typedValueManaged()) |tvm| { + tvm.deinit(allocator); + } + allocator.destroy(self); + } + + pub const Hash = [16]u8; + + /// If the name is small enough, it is used directly as the hash. + /// If it is long, blake3 hash is computed. + pub fn hashSimpleName(name: []const u8) Hash { + var out: Hash = undefined; + if (name.len <= Hash.len) { + mem.copy(u8, &out, name); + mem.set(u8, out[name.len..], 0); + } else { + std.crypto.Blake3.hash(name, &out); + } + return out; + } + + /// Must generate unique bytes with no collisions with other decls. + /// The point of hashing here is only to limit the number of bytes of + /// the unique identifier to a fixed size (16 bytes). + pub fn fullyQualifiedNameHash(self: Decl) Hash { + // Right now we only have ZIRModule as the source. So this is simply the + // relative name of the decl. + return hashSimpleName(mem.spanZ(u8, self.name)); + } + + pub fn typedValue(self: *Decl) error{AnalysisFail}!TypedValue { + const tvm = self.typedValueManaged() orelse return error.AnalysisFail; + return tvm.typed_value; + } + + pub fn value(self: *Decl) error{AnalysisFail}!Value { + return (try self.typedValue()).val; + } + + pub fn dump(self: *Decl) void { + const loc = std.zig.findLineColumn(self.scope.source.bytes, self.src); + std.debug.warn("{}:{}:{} name={} status={}", .{ + self.scope.sub_file_path, + loc.line + 1, + loc.column + 1, + mem.spanZ(self.name), + @tagName(self.analysis), + }); + if (self.typedValueManaged()) |tvm| { + std.debug.warn(" ty={} val={}", .{ tvm.typed_value.ty, tvm.typed_value.val }); + } + std.debug.warn("\n", .{}); + } + + fn typedValueManaged(self: *Decl) ?*TypedValue.Managed { + switch (self.analysis) { + .initial_in_progress, + .initial_dependency_failure, + .initial_sema_failure, + => return null, + .codegen_failure, + .codegen_failure_retryable, + .repeat_dependency_failure, + .repeat_sema_failure, + .repeat_in_progress, + .complete, + => return &self.typed_value.most_recent, + } + } +}; + +/// Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator. +pub const Fn = struct { + /// This memory owned by the Decl's TypedValue.Managed arena allocator. + fn_type: Type, + analysis: union(enum) { + /// The value is the source instruction. + queued: *zir.Inst.Fn, + in_progress: *Analysis, + /// There will be a corresponding ErrorMsg in Module.failed_decls + sema_failure, + /// This Fn might be OK but it depends on another Decl which did not successfully complete + /// semantic analysis. + dependency_failure, + success: Body, + }, + + /// This memory is temporary and points to stack memory for the duration + /// of Fn analysis. + pub const Analysis = struct { + inner_block: Scope.Block, + /// TODO Performance optimization idea: instead of this inst_table, + /// use a field in the zir.Inst instead to track corresponding instructions + inst_table: std.AutoHashMap(*zir.Inst, *Inst), + needed_inst_capacity: usize, + }; +}; + +pub const Scope = struct { + tag: Tag, + + pub fn cast(base: *Scope, comptime T: type) ?*T { + if (base.tag != T.base_tag) + return null; + + return @fieldParentPtr(T, "base", base); + } + + /// Asserts the scope has a parent which is a DeclAnalysis and + /// returns the arena Allocator. + pub fn arena(self: *Scope) *Allocator { + switch (self.tag) { + .block => return self.cast(Block).?.arena, + .decl => return &self.cast(DeclAnalysis).?.arena.allocator, + .zir_module => return &self.cast(ZIRModule).?.contents.module.arena.allocator, + } + } + + /// Asserts the scope has a parent which is a DeclAnalysis and + /// returns the Decl. + pub fn decl(self: *Scope) *Decl { + switch (self.tag) { + .block => return self.cast(Block).?.decl, + .decl => return self.cast(DeclAnalysis).?.decl, + .zir_module => unreachable, + } + } + + /// Asserts the scope has a parent which is a ZIRModule and + /// returns it. + pub fn namespace(self: *Scope) *ZIRModule { + switch (self.tag) { + .block => return self.cast(Block).?.decl.scope, + .decl => return self.cast(DeclAnalysis).?.decl.scope, + .zir_module => return self.cast(ZIRModule).?, + } + } + + pub fn dumpInst(self: *Scope, inst: *Inst) void { + const zir_module = self.namespace(); + const loc = std.zig.findLineColumn(zir_module.source.bytes, inst.src); + std.debug.warn("{}:{}:{}: {}: ty={}\n", .{ + zir_module.sub_file_path, + loc.line + 1, + loc.column + 1, + @tagName(inst.tag), + inst.ty, + }); + } + + pub const Tag = enum { + zir_module, + block, + decl, + }; + + pub const ZIRModule = struct { + pub const base_tag: Tag = .zir_module; + base: Scope = Scope{ .tag = base_tag }, + /// Relative to the owning package's root_src_dir. + /// Reference to external memory, not owned by ZIRModule. + sub_file_path: []const u8, + source: union { + unloaded: void, + bytes: [:0]const u8, + }, + contents: union { + not_available: void, + module: *zir.Module, + }, + status: enum { + never_loaded, + unloaded_success, + unloaded_parse_failure, + unloaded_sema_failure, + loaded_parse_failure, + loaded_sema_failure, + loaded_success, + }, + + pub fn unload(self: *ZIRModule, allocator: *Allocator) void { + switch (self.status) { + .never_loaded, + .unloaded_parse_failure, + .unloaded_sema_failure, + .unloaded_success, + => {}, + + .loaded_success => { + allocator.free(self.source.bytes); + self.contents.module.deinit(allocator); + allocator.destroy(self.contents.module); + self.status = .unloaded_success; + }, + .loaded_sema_failure => { + allocator.free(self.source.bytes); + self.contents.module.deinit(allocator); + allocator.destroy(self.contents.module); + self.status = .unloaded_sema_failure; + }, + .loaded_parse_failure => { + allocator.free(self.source.bytes); + self.status = .unloaded_parse_failure; + }, + } + } + + pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + self.unload(allocator); + self.* = undefined; + } + + pub fn dumpSrc(self: *ZIRModule, src: usize) void { + const loc = std.zig.findLineColumn(self.source.bytes, src); + std.debug.warn("{}:{}:{}\n", .{ self.sub_file_path, loc.line + 1, loc.column + 1 }); + } + }; + + /// This is a temporary structure, references to it are valid only + /// during semantic analysis of the block. + pub const Block = struct { + pub const base_tag: Tag = .block; + base: Scope = Scope{ .tag = base_tag }, + func: *Fn, + decl: *Decl, + instructions: ArrayListUnmanaged(*Inst), + /// Points to the arena allocator of DeclAnalysis + arena: *Allocator, + }; + + /// This is a temporary structure, references to it are valid only + /// during semantic analysis of the decl. + pub const DeclAnalysis = struct { + pub const base_tag: Tag = .decl; + base: Scope = Scope{ .tag = base_tag }, + decl: *Decl, + arena: std.heap.ArenaAllocator, + }; +}; + +pub const Body = struct { + instructions: []*Inst, +}; + +pub const AllErrors = struct { + arena: std.heap.ArenaAllocator.State, + list: []const Message, + + pub const Message = struct { + src_path: []const u8, + line: usize, + column: usize, + byte_offset: usize, + msg: []const u8, + }; + + pub fn deinit(self: *AllErrors, allocator: *Allocator) void { + self.arena.promote(allocator).deinit(); + } + + fn add( + arena: *std.heap.ArenaAllocator, + errors: *std.ArrayList(Message), + sub_file_path: []const u8, + source: []const u8, + simple_err_msg: ErrorMsg, + ) !void { + const loc = std.zig.findLineColumn(source, simple_err_msg.byte_offset); + try errors.append(.{ + .src_path = try arena.allocator.dupe(u8, sub_file_path), + .msg = try arena.allocator.dupe(u8, simple_err_msg.msg), + .byte_offset = simple_err_msg.byte_offset, + .line = loc.line, + .column = loc.column, + }); + } +}; + +pub fn deinit(self: *Module) void { + const allocator = self.allocator; + self.work_queue.deinit(); + { + var it = self.decl_table.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.decl_table.deinit(); + } + { + var it = self.failed_decls.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_decls.deinit(); + } + { + var it = self.failed_files.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_files.deinit(); + } + { + var it = self.failed_exports.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_exports.deinit(); + } + { + var it = self.decl_exports.iterator(); + while (it.next()) |kv| { + const export_list = kv.value; + allocator.free(export_list); + } + self.decl_exports.deinit(); + } + { + var it = self.export_owners.iterator(); + while (it.next()) |kv| { + const export_list = kv.value; + for (export_list) |exp| { + allocator.destroy(exp); + } + allocator.free(export_list); + } + self.export_owners.deinit(); + } + self.root_pkg.destroy(); + { + self.root_scope.deinit(allocator); + allocator.destroy(self.root_scope); + } + self.* = undefined; +} + +pub fn target(self: Module) std.Target { + return self.bin_file.options.target; +} + +/// Detect changes to source files, perform semantic analysis, and update the output files. +pub fn update(self: *Module) !void { + // TODO Use the cache hash file system to detect which source files changed. + // Here we simulate a full cache miss. + // Analyze the root source file now. + self.analyzeRoot(self.root_scope) catch |err| switch (err) { + error.AnalysisFail => { + assert(self.totalErrorCount() != 0); + }, + else => |e| return e, + }; + + try self.performAllTheWork(); + + // Unload all the source files from memory. + self.root_scope.unload(self.allocator); + + try self.bin_file.flush(); + self.link_error_flags = self.bin_file.error_flags; +} + +pub fn totalErrorCount(self: *Module) usize { + return self.failed_decls.size + + self.failed_files.size + + self.failed_exports.size + + @boolToInt(self.link_error_flags.no_entry_point_found); +} + +pub fn getAllErrorsAlloc(self: *Module) !AllErrors { + var arena = std.heap.ArenaAllocator.init(self.allocator); + errdefer arena.deinit(); + + var errors = std.ArrayList(AllErrors.Message).init(self.allocator); + defer errors.deinit(); + + { + var it = self.failed_files.iterator(); + while (it.next()) |kv| { + const scope = kv.key; + const err_msg = kv.value; + const source = scope.source.bytes; + try AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg.*); + } + } + { + var it = self.failed_decls.iterator(); + while (it.next()) |kv| { + const decl = kv.key; + const err_msg = kv.value; + const source = decl.scope.source.bytes; + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); + } + } + { + var it = self.failed_exports.iterator(); + while (it.next()) |kv| { + const decl = kv.key.owner_decl; + const err_msg = kv.value; + const source = decl.scope.source.bytes; + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); + } + } + + if (self.link_error_flags.no_entry_point_found) { + try errors.append(.{ + .src_path = self.root_pkg.root_src_path, + .line = 0, + .column = 0, + .byte_offset = 0, + .msg = try std.fmt.allocPrint(&arena.allocator, "no entry point found", .{}), + }); + } + + assert(errors.items.len == self.totalErrorCount()); + + return AllErrors{ + .arena = arena.state, + .list = try arena.allocator.dupe(AllErrors.Message, errors.items), + }; +} + +const InnerError = error{ OutOfMemory, AnalysisFail }; + +pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { + while (self.work_queue.readItem()) |work_item| switch (work_item) { + .codegen_decl => |decl| switch (decl.analysis) { + .initial_in_progress, + .repeat_in_progress, + => unreachable, + + .initial_sema_failure, + .repeat_sema_failure, + .codegen_failure, + .initial_dependency_failure, + .repeat_dependency_failure, + => continue, + + .complete, .codegen_failure_retryable => { + if (decl.typed_value.most_recent.typed_value.val.cast(Value.Payload.Function)) |payload| { + switch (payload.func.analysis) { + .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { + error.AnalysisFail => { + if (payload.func.analysis == .queued) { + payload.func.analysis = .dependency_failure; + } + continue; + }, + else => |e| return e, + }, + .in_progress => unreachable, + .sema_failure, .dependency_failure => continue, + .success => {}, + } + } + + assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()); + + self.bin_file.updateDecl(self, decl) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + decl.analysis = .repeat_dependency_failure; + }, + else => { + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( + self.allocator, + decl.src, + "unable to codegen: {}", + .{@errorName(err)}, + )); + decl.analysis = .codegen_failure_retryable; + }, + }; + }, + }, + }; +} + +fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { + switch (root_scope.status) { + .never_loaded, .unloaded_success => { + try self.failed_files.ensureCapacity(self.failed_files.size + 1); + + var keep_source = false; + const source = try self.root_pkg.root_src_dir.readFileAllocOptions( + self.allocator, + self.root_pkg.root_src_path, + std.math.maxInt(u32), + 1, + 0, + ); + defer if (!keep_source) self.allocator.free(source); + + var keep_zir_module = false; + const zir_module = try self.allocator.create(zir.Module); + defer if (!keep_zir_module) self.allocator.destroy(zir_module); + + zir_module.* = try zir.parse(self.allocator, source); + defer if (!keep_zir_module) zir_module.deinit(self.allocator); + + if (zir_module.error_msg) |src_err_msg| { + self.failed_files.putAssumeCapacityNoClobber( + root_scope, + try ErrorMsg.create(self.allocator, src_err_msg.byte_offset, "{}", .{src_err_msg.msg}), + ); + root_scope.status = .loaded_parse_failure; + root_scope.source = .{ .bytes = source }; + keep_source = true; + return error.AnalysisFail; + } + + root_scope.status = .loaded_success; + root_scope.source = .{ .bytes = source }; + keep_source = true; + root_scope.contents = .{ .module = zir_module }; + keep_zir_module = true; + + return zir_module; + }, + + .unloaded_parse_failure, + .unloaded_sema_failure, + .loaded_parse_failure, + .loaded_sema_failure, + => return error.AnalysisFail, + .loaded_success => return root_scope.contents.module, + } +} + +fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { + // TODO use the cache to identify, from the modified source files, the decls which have + // changed based on the span of memory that represents the decl in the re-parsed source file. + // Use the cached dependency graph to recursively determine the set of decls which need + // regeneration. + // Here we simulate adding a source file which was previously not part of the compilation, + // which means scanning the decls looking for exports. + // TODO also identify decls that need to be deleted. + switch (root_scope.status) { + .never_loaded => { + const src_module = try self.getSrcModule(root_scope); + + // Here we ensure enough queue capacity to store all the decls, so that later we can use + // appendAssumeCapacity. + try self.work_queue.ensureUnusedCapacity(src_module.decls.len); + + for (src_module.decls) |decl| { + if (decl.cast(zir.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, + + .unloaded_parse_failure, + .unloaded_sema_failure, + .loaded_parse_failure, + .loaded_sema_failure, + .loaded_success, + .unloaded_success, + => { + const src_module = try self.getSrcModule(root_scope); + + // Look for changed decls. + for (src_module.decls) |src_decl| { + const name_hash = Decl.hashSimpleName(src_decl.name); + if (self.decl_table.get(name_hash)) |kv| { + const decl = kv.value; + const new_contents_hash = Decl.hashSimpleName(src_decl.contents); + if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) { + // TODO recursive dependency management + std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); + self.decl_table.removeAssertDiscard(name_hash); + const saved_link = decl.link; + decl.destroy(self.allocator); + if (self.export_owners.getValue(decl)) |exports| { + @panic("TODO handle updating a decl that does an export"); + } + const new_decl = self.resolveDecl( + &root_scope.base, + src_decl, + saved_link, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => continue, + }; + if (self.decl_exports.remove(decl)) |entry| { + self.decl_exports.putAssumeCapacityNoClobber(new_decl, entry.value); + } + } + } else if (src_decl.cast(zir.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, + } +} + +fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { + // Use the Decl's arena for function memory. + var arena = decl.typed_value.most_recent.arena.?.promote(self.allocator); + defer decl.typed_value.most_recent.arena.?.* = arena.state; + var analysis: Fn.Analysis = .{ + .inner_block = .{ + .func = func, + .decl = decl, + .instructions = .{}, + .arena = &arena.allocator, + }, + .needed_inst_capacity = 0, + .inst_table = std.AutoHashMap(*zir.Inst, *Inst).init(self.allocator), + }; + defer analysis.inner_block.instructions.deinit(self.allocator); + defer analysis.inst_table.deinit(); + + const fn_inst = func.analysis.queued; + func.analysis = .{ .in_progress = &analysis }; + + try self.analyzeBody(&analysis.inner_block.base, fn_inst.positionals.body); + + func.analysis = .{ + .success = .{ + .instructions = try arena.allocator.dupe(*Inst, analysis.inner_block.instructions.items), + }, + }; +} + +fn resolveDecl( + self: *Module, + scope: *Scope, + old_inst: *zir.Inst, + bin_file_link: link.ElfFile.Decl, +) InnerError!*Decl { + const hash = Decl.hashSimpleName(old_inst.name); + if (self.decl_table.get(hash)) |kv| { + return kv.value; + } else { + const new_decl = blk: { + try self.decl_table.ensureCapacity(self.decl_table.size + 1); + const new_decl = try self.allocator.create(Decl); + errdefer self.allocator.destroy(new_decl); + const name = try mem.dupeZ(self.allocator, u8, old_inst.name); + errdefer self.allocator.free(name); + new_decl.* = .{ + .name = name, + .scope = scope.namespace(), + .src = old_inst.src, + .typed_value = .{ .never_succeeded = {} }, + .analysis = .initial_in_progress, + .contents_hash = Decl.hashSimpleName(old_inst.contents), + .link = bin_file_link, + }; + self.decl_table.putAssumeCapacityNoClobber(hash, new_decl); + break :blk new_decl; + }; + + var decl_scope: Scope.DeclAnalysis = .{ + .decl = new_decl, + .arena = std.heap.ArenaAllocator.init(self.allocator), + }; + errdefer decl_scope.arena.deinit(); + + const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + switch (new_decl.analysis) { + .initial_in_progress => new_decl.analysis = .initial_dependency_failure, + .repeat_in_progress => new_decl.analysis = .repeat_dependency_failure, + else => {}, + } + return error.AnalysisFail; + }, + }; + const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); + + const has_codegen_bits = typed_value.ty.hasCodeGenBits(); + if (has_codegen_bits) { + // We don't fully codegen the decl until later, but we do need to reserve a global + // offset table index for it. This allows us to codegen decls out of dependency order, + // increasing how many computations can be done in parallel. + try self.bin_file.allocateDeclIndexes(new_decl); + } + + arena_state.* = decl_scope.arena.state; + + new_decl.typed_value = .{ + .most_recent = .{ + .typed_value = typed_value, + .arena = arena_state, + }, + }; + new_decl.analysis = .complete; + if (has_codegen_bits) { + // We ensureCapacity when scanning for decls. + self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); + } + return new_decl; + } +} + +fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!*Decl { + const decl = try self.resolveDecl(scope, old_inst, link.ElfFile.Decl.empty); + switch (decl.analysis) { + .initial_in_progress => unreachable, + .repeat_in_progress => unreachable, + .initial_dependency_failure, + .repeat_dependency_failure, + .initial_sema_failure, + .repeat_sema_failure, + .codegen_failure, + .codegen_failure_retryable, + => return error.AnalysisFail, + + .complete => return decl, + } +} + +fn resolveInst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!*Inst { + if (scope.cast(Scope.Block)) |block| { + if (block.func.analysis.in_progress.inst_table.get(old_inst)) |kv| { + return kv.value; + } + } + + const decl = try self.resolveCompleteDecl(scope, old_inst); + const decl_ref = try self.analyzeDeclRef(scope, old_inst.src, decl); + return self.analyzeDeref(scope, old_inst.src, decl_ref, old_inst.src); +} + +fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { + return scope.cast(Scope.Block) orelse + return self.fail(scope, src, "instruction illegal outside function body", .{}); +} + +fn resolveInstConst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!TypedValue { + const new_inst = try self.resolveInst(scope, old_inst); + const val = try self.resolveConstValue(scope, new_inst); + return TypedValue{ + .ty = new_inst.ty, + .val = val, + }; +} + +fn resolveConstValue(self: *Module, scope: *Scope, base: *Inst) !Value { + return (try self.resolveDefinedValue(scope, base)) orelse + return self.fail(scope, base.src, "unable to resolve comptime value", .{}); +} + +fn resolveDefinedValue(self: *Module, scope: *Scope, base: *Inst) !?Value { + if (base.value()) |val| { + if (val.isUndef()) { + return self.fail(scope, base.src, "use of undefined value here causes undefined behavior", .{}); + } + return val; + } + return null; +} + +fn resolveConstString(self: *Module, scope: *Scope, old_inst: *zir.Inst) ![]u8 { + const new_inst = try self.resolveInst(scope, old_inst); + const wanted_type = Type.initTag(.const_slice_u8); + const coerced_inst = try self.coerce(scope, wanted_type, new_inst); + const val = try self.resolveConstValue(scope, coerced_inst); + return val.toAllocatedBytes(scope.arena()); +} + +fn resolveType(self: *Module, scope: *Scope, old_inst: *zir.Inst) !Type { + const new_inst = try self.resolveInst(scope, old_inst); + const wanted_type = Type.initTag(.@"type"); + const coerced_inst = try self.coerce(scope, wanted_type, new_inst); + const val = try self.resolveConstValue(scope, coerced_inst); + return val.toType(); +} + +fn analyzeExport(self: *Module, scope: *Scope, export_inst: *zir.Inst.Export) InnerError!void { + try self.decl_exports.ensureCapacity(self.decl_exports.size + 1); + try self.export_owners.ensureCapacity(self.export_owners.size + 1); + const symbol_name = try self.resolveConstString(scope, export_inst.positionals.symbol_name); + const exported_decl = try self.resolveCompleteDecl(scope, export_inst.positionals.value); + const typed_value = exported_decl.typed_value.most_recent.typed_value; + switch (typed_value.ty.zigTypeTag()) { + .Fn => {}, + else => return self.fail( + scope, + export_inst.positionals.value.src, + "unable to export type '{}'", + .{typed_value.ty}, + ), + } + const new_export = try self.allocator.create(Export); + errdefer self.allocator.destroy(new_export); + + const owner_decl = scope.decl(); + + new_export.* = .{ + .options = .{ .name = symbol_name }, + .src = export_inst.base.src, + .link = .{}, + .owner_decl = owner_decl, + .status = .in_progress, + }; + + // Add to export_owners table. + const eo_gop = self.export_owners.getOrPut(owner_decl) catch unreachable; + if (!eo_gop.found_existing) { + eo_gop.kv.value = &[0]*Export{}; + } + eo_gop.kv.value = try self.allocator.realloc(eo_gop.kv.value, eo_gop.kv.value.len + 1); + eo_gop.kv.value[eo_gop.kv.value.len - 1] = new_export; + errdefer eo_gop.kv.value = self.allocator.shrink(eo_gop.kv.value, eo_gop.kv.value.len - 1); + + // Add to exported_decl table. + const de_gop = self.decl_exports.getOrPut(exported_decl) catch unreachable; + if (!de_gop.found_existing) { + de_gop.kv.value = &[0]*Export{}; + } + de_gop.kv.value = try self.allocator.realloc(de_gop.kv.value, de_gop.kv.value.len + 1); + de_gop.kv.value[de_gop.kv.value.len - 1] = new_export; + errdefer de_gop.kv.value = self.allocator.shrink(de_gop.kv.value, de_gop.kv.value.len - 1); + + self.bin_file.updateDeclExports(self, exported_decl, de_gop.kv.value) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + try self.failed_exports.ensureCapacity(self.failed_exports.size + 1); + self.failed_exports.putAssumeCapacityNoClobber(new_export, try ErrorMsg.create( + self.allocator, + export_inst.base.src, + "unable to export: {}", + .{@errorName(err)}, + )); + new_export.status = .failed_retryable; + }, + }; +} + +/// TODO should not need the cast on the last parameter at the callsites +fn addNewInstArgs( + self: *Module, + block: *Scope.Block, + src: usize, + ty: Type, + comptime T: type, + args: Inst.Args(T), +) !*Inst { + const inst = try self.addNewInst(block, src, ty, T); + inst.args = args; + return &inst.base; +} + +fn addNewInst(self: *Module, block: *Scope.Block, src: usize, ty: Type, comptime T: type) !*T { + const inst = try block.arena.create(T); + inst.* = .{ + .base = .{ + .tag = T.base_tag, + .ty = ty, + .src = src, + }, + .args = undefined, + }; + try block.instructions.append(self.allocator, &inst.base); + return inst; +} + +fn constInst(self: *Module, scope: *Scope, src: usize, typed_value: TypedValue) !*Inst { + const const_inst = try scope.arena().create(Inst.Constant); + const_inst.* = .{ + .base = .{ + .tag = Inst.Constant.base_tag, + .ty = typed_value.ty, + .src = src, + }, + .val = typed_value.val, + }; + return &const_inst.base; +} + +fn constStr(self: *Module, scope: *Scope, src: usize, str: []const u8) !*Inst { + const ty_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); + ty_payload.* = .{ .len = str.len }; + + const bytes_payload = try scope.arena().create(Value.Payload.Bytes); + bytes_payload.* = .{ .data = str }; + + return self.constInst(scope, src, .{ + .ty = Type.initPayload(&ty_payload.base), + .val = Value.initPayload(&bytes_payload.base), + }); +} + +fn constType(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { + return self.constInst(scope, src, .{ + .ty = Type.initTag(.type), + .val = try ty.toValue(scope.arena()), + }); +} + +fn constVoid(self: *Module, scope: *Scope, src: usize) !*Inst { + return self.constInst(scope, src, .{ + .ty = Type.initTag(.void), + .val = Value.initTag(.the_one_possible_value), + }); +} + +fn constUndef(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initTag(.undef), + }); +} + +fn constBool(self: *Module, scope: *Scope, src: usize, v: bool) !*Inst { + return self.constInst(scope, src, .{ + .ty = Type.initTag(.bool), + .val = ([2]Value{ Value.initTag(.bool_false), Value.initTag(.bool_true) })[@boolToInt(v)], + }); +} + +fn constIntUnsigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: u64) !*Inst { + const int_payload = try scope.arena().create(Value.Payload.Int_u64); + int_payload.* = .{ .int = int }; + + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initPayload(&int_payload.base), + }); +} + +fn constIntSigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: i64) !*Inst { + const int_payload = try scope.arena().create(Value.Payload.Int_i64); + int_payload.* = .{ .int = int }; + + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initPayload(&int_payload.base), + }); +} + +fn constIntBig(self: *Module, scope: *Scope, src: usize, ty: Type, big_int: BigIntConst) !*Inst { + const val_payload = if (big_int.positive) blk: { + if (big_int.to(u64)) |x| { + return self.constIntUnsigned(scope, src, ty, x); + } else |err| switch (err) { + error.NegativeIntoUnsigned => unreachable, + error.TargetTooSmall => {}, // handled below + } + const big_int_payload = try scope.arena().create(Value.Payload.IntBigPositive); + big_int_payload.* = .{ .limbs = big_int.limbs }; + break :blk &big_int_payload.base; + } else blk: { + if (big_int.to(i64)) |x| { + return self.constIntSigned(scope, src, ty, x); + } else |err| switch (err) { + error.NegativeIntoUnsigned => unreachable, + error.TargetTooSmall => {}, // handled below + } + const big_int_payload = try scope.arena().create(Value.Payload.IntBigNegative); + big_int_payload.* = .{ .limbs = big_int.limbs }; + break :blk &big_int_payload.base; + }; + + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initPayload(val_payload), + }); +} + +fn analyzeInstConst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!TypedValue { + const new_inst = try self.analyzeInst(scope, old_inst); + return TypedValue{ + .ty = new_inst.ty, + .val = try self.resolveConstValue(scope, new_inst), + }; +} + +fn analyzeInst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!*Inst { + switch (old_inst.tag) { + .breakpoint => return self.analyzeInstBreakpoint(scope, old_inst.cast(zir.Inst.Breakpoint).?), + .call => return self.analyzeInstCall(scope, old_inst.cast(zir.Inst.Call).?), + .declref => return self.analyzeInstDeclRef(scope, old_inst.cast(zir.Inst.DeclRef).?), + .str => { + const bytes = old_inst.cast(zir.Inst.Str).?.positionals.bytes; + // The bytes references memory inside the ZIR module, which can get deallocated + // after semantic analysis is complete. We need the memory to be in the Decl's arena. + const arena_bytes = try scope.arena().dupe(u8, bytes); + return self.constStr(scope, old_inst.src, arena_bytes); + }, + .int => { + const big_int = old_inst.cast(zir.Inst.Int).?.positionals.int; + return self.constIntBig(scope, old_inst.src, Type.initTag(.comptime_int), big_int); + }, + .ptrtoint => return self.analyzeInstPtrToInt(scope, old_inst.cast(zir.Inst.PtrToInt).?), + .fieldptr => return self.analyzeInstFieldPtr(scope, old_inst.cast(zir.Inst.FieldPtr).?), + .deref => return self.analyzeInstDeref(scope, old_inst.cast(zir.Inst.Deref).?), + .as => return self.analyzeInstAs(scope, old_inst.cast(zir.Inst.As).?), + .@"asm" => return self.analyzeInstAsm(scope, old_inst.cast(zir.Inst.Asm).?), + .@"unreachable" => return self.analyzeInstUnreachable(scope, old_inst.cast(zir.Inst.Unreachable).?), + .@"return" => return self.analyzeInstRet(scope, old_inst.cast(zir.Inst.Return).?), + .@"fn" => return self.analyzeInstFn(scope, old_inst.cast(zir.Inst.Fn).?), + .@"export" => { + try self.analyzeExport(scope, old_inst.cast(zir.Inst.Export).?); + return self.constVoid(scope, old_inst.src); + }, + .primitive => return self.analyzeInstPrimitive(scope, old_inst.cast(zir.Inst.Primitive).?), + .ref => return self.analyzeInstRef(scope, old_inst.cast(zir.Inst.Ref).?), + .fntype => return self.analyzeInstFnType(scope, old_inst.cast(zir.Inst.FnType).?), + .intcast => return self.analyzeInstIntCast(scope, old_inst.cast(zir.Inst.IntCast).?), + .bitcast => return self.analyzeInstBitCast(scope, old_inst.cast(zir.Inst.BitCast).?), + .elemptr => return self.analyzeInstElemPtr(scope, old_inst.cast(zir.Inst.ElemPtr).?), + .add => return self.analyzeInstAdd(scope, old_inst.cast(zir.Inst.Add).?), + .cmp => return self.analyzeInstCmp(scope, old_inst.cast(zir.Inst.Cmp).?), + .condbr => return self.analyzeInstCondBr(scope, old_inst.cast(zir.Inst.CondBr).?), + .isnull => return self.analyzeInstIsNull(scope, old_inst.cast(zir.Inst.IsNull).?), + .isnonnull => return self.analyzeInstIsNonNull(scope, old_inst.cast(zir.Inst.IsNonNull).?), + } +} + +fn analyzeInstBreakpoint(self: *Module, scope: *Scope, inst: *zir.Inst.Breakpoint) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); +} + +fn analyzeInstRef(self: *Module, scope: *Scope, inst: *zir.Inst.Ref) InnerError!*Inst { + const decl = try self.resolveCompleteDecl(scope, inst.positionals.operand); + return self.analyzeDeclRef(scope, inst.base.src, decl); +} + +fn analyzeInstDeclRef(self: *Module, scope: *Scope, inst: *zir.Inst.DeclRef) InnerError!*Inst { + const decl_name = try self.resolveConstString(scope, inst.positionals.name); + // This will need to get more fleshed out when there are proper structs & namespaces. + const zir_module = scope.namespace(); + for (zir_module.contents.module.decls) |src_decl| { + if (mem.eql(u8, src_decl.name, decl_name)) { + const decl = try self.resolveCompleteDecl(scope, src_decl); + return self.analyzeDeclRef(scope, inst.base.src, decl); + } + } + return self.fail(scope, inst.positionals.name.src, "use of undeclared identifier '{}'", .{decl_name}); +} + +fn analyzeDeclRef(self: *Module, scope: *Scope, src: usize, decl: *Decl) InnerError!*Inst { + const decl_tv = try decl.typedValue(); + const ty_payload = try scope.arena().create(Type.Payload.SingleConstPointer); + ty_payload.* = .{ .pointee_type = decl_tv.ty }; + const val_payload = try scope.arena().create(Value.Payload.DeclRef); + val_payload.* = .{ .decl = decl }; + return self.constInst(scope, src, .{ + .ty = Type.initPayload(&ty_payload.base), + .val = Value.initPayload(&val_payload.base), + }); +} + +fn analyzeInstCall(self: *Module, scope: *Scope, inst: *zir.Inst.Call) InnerError!*Inst { + const func = try self.resolveInst(scope, inst.positionals.func); + if (func.ty.zigTypeTag() != .Fn) + return self.fail(scope, inst.positionals.func.src, "type '{}' not a function", .{func.ty}); + + const cc = func.ty.fnCallingConvention(); + if (cc == .Naked) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "unable to call function with naked calling convention", + .{}, + ); + } + const call_params_len = inst.positionals.args.len; + const fn_params_len = func.ty.fnParamLen(); + if (func.ty.fnIsVarArgs()) { + if (call_params_len < fn_params_len) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "expected at least {} arguments, found {}", + .{ fn_params_len, call_params_len }, + ); + } + return self.fail(scope, inst.base.src, "TODO implement support for calling var args functions", .{}); + } else if (fn_params_len != call_params_len) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "expected {} arguments, found {}", + .{ fn_params_len, call_params_len }, + ); + } + + if (inst.kw_args.modifier == .compile_time) { + return self.fail(scope, inst.base.src, "TODO implement comptime function calls", .{}); + } + if (inst.kw_args.modifier != .auto) { + return self.fail(scope, inst.base.src, "TODO implement call with modifier {}", .{inst.kw_args.modifier}); + } + + // TODO handle function calls of generic functions + + const fn_param_types = try self.allocator.alloc(Type, fn_params_len); + defer self.allocator.free(fn_param_types); + func.ty.fnParamTypes(fn_param_types); + + const casted_args = try scope.arena().alloc(*Inst, fn_params_len); + for (inst.positionals.args) |src_arg, i| { + const uncasted_arg = try self.resolveInst(scope, src_arg); + casted_args[i] = try self.coerce(scope, fn_param_types[i], uncasted_arg); + } + + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Call, Inst.Args(Inst.Call){ + .func = func, + .args = casted_args, + }); +} + +fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *zir.Inst.Fn) InnerError!*Inst { + const fn_type = try self.resolveType(scope, fn_inst.positionals.fn_type); + const new_func = try scope.arena().create(Fn); + new_func.* = .{ + .fn_type = fn_type, + .analysis = .{ .queued = fn_inst }, + }; + const fn_payload = try scope.arena().create(Value.Payload.Function); + fn_payload.* = .{ .func = new_func }; + return self.constInst(scope, fn_inst.base.src, .{ + .ty = fn_type, + .val = Value.initPayload(&fn_payload.base), + }); +} + +fn analyzeInstFnType(self: *Module, scope: *Scope, fntype: *zir.Inst.FnType) InnerError!*Inst { + const return_type = try self.resolveType(scope, fntype.positionals.return_type); + + if (return_type.zigTypeTag() == .NoReturn and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .Unspecified) + { + return self.constType(scope, fntype.base.src, Type.initTag(.fn_noreturn_no_args)); + } + + if (return_type.zigTypeTag() == .NoReturn and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .Naked) + { + return self.constType(scope, fntype.base.src, Type.initTag(.fn_naked_noreturn_no_args)); + } + + if (return_type.zigTypeTag() == .Void and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .C) + { + return self.constType(scope, fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); + } + + return self.fail(scope, fntype.base.src, "TODO implement fntype instruction more", .{}); +} + +fn analyzeInstPrimitive(self: *Module, scope: *Scope, primitive: *zir.Inst.Primitive) InnerError!*Inst { + return self.constType(scope, primitive.base.src, primitive.positionals.tag.toType()); +} + +fn analyzeInstAs(self: *Module, scope: *Scope, as: *zir.Inst.As) InnerError!*Inst { + const dest_type = try self.resolveType(scope, as.positionals.dest_type); + const new_inst = try self.resolveInst(scope, as.positionals.value); + return self.coerce(scope, dest_type, new_inst); +} + +fn analyzeInstPtrToInt(self: *Module, scope: *Scope, ptrtoint: *zir.Inst.PtrToInt) InnerError!*Inst { + const ptr = try self.resolveInst(scope, ptrtoint.positionals.ptr); + if (ptr.ty.zigTypeTag() != .Pointer) { + return self.fail(scope, ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); + } + // TODO handle known-pointer-address + const b = try self.requireRuntimeBlock(scope, ptrtoint.base.src); + const ty = Type.initTag(.usize); + return self.addNewInstArgs(b, ptrtoint.base.src, ty, Inst.PtrToInt, Inst.Args(Inst.PtrToInt){ .ptr = ptr }); +} + +fn analyzeInstFieldPtr(self: *Module, scope: *Scope, fieldptr: *zir.Inst.FieldPtr) InnerError!*Inst { + const object_ptr = try self.resolveInst(scope, fieldptr.positionals.object_ptr); + const field_name = try self.resolveConstString(scope, fieldptr.positionals.field_name); + + const elem_ty = switch (object_ptr.ty.zigTypeTag()) { + .Pointer => object_ptr.ty.elemType(), + else => return self.fail(scope, fieldptr.positionals.object_ptr.src, "expected pointer, found '{}'", .{object_ptr.ty}), + }; + switch (elem_ty.zigTypeTag()) { + .Array => { + if (mem.eql(u8, field_name, "len")) { + const len_payload = try scope.arena().create(Value.Payload.Int_u64); + len_payload.* = .{ .int = elem_ty.arrayLen() }; + + const ref_payload = try scope.arena().create(Value.Payload.RefVal); + ref_payload.* = .{ .val = Value.initPayload(&len_payload.base) }; + + return self.constInst(scope, fieldptr.base.src, .{ + .ty = Type.initTag(.single_const_pointer_to_comptime_int), + .val = Value.initPayload(&ref_payload.base), + }); + } else { + return self.fail( + scope, + fieldptr.positionals.field_name.src, + "no member named '{}' in '{}'", + .{ field_name, elem_ty }, + ); + } + }, + else => return self.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), + } +} + +fn analyzeInstIntCast(self: *Module, scope: *Scope, intcast: *zir.Inst.IntCast) InnerError!*Inst { + const dest_type = try self.resolveType(scope, intcast.positionals.dest_type); + const new_inst = try self.resolveInst(scope, intcast.positionals.value); + + const dest_is_comptime_int = switch (dest_type.zigTypeTag()) { + .ComptimeInt => true, + .Int => false, + else => return self.fail( + scope, + intcast.positionals.dest_type.src, + "expected integer type, found '{}'", + .{ + dest_type, + }, + ), + }; + + switch (new_inst.ty.zigTypeTag()) { + .ComptimeInt, .Int => {}, + else => return self.fail( + scope, + intcast.positionals.value.src, + "expected integer type, found '{}'", + .{new_inst.ty}, + ), + } + + if (dest_is_comptime_int or new_inst.value() != null) { + return self.coerce(scope, dest_type, new_inst); + } + + return self.fail(scope, intcast.base.src, "TODO implement analyze widen or shorten int", .{}); +} + +fn analyzeInstBitCast(self: *Module, scope: *Scope, inst: *zir.Inst.BitCast) InnerError!*Inst { + const dest_type = try self.resolveType(scope, inst.positionals.dest_type); + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.bitcast(scope, dest_type, operand); +} + +fn analyzeInstElemPtr(self: *Module, scope: *Scope, inst: *zir.Inst.ElemPtr) InnerError!*Inst { + const array_ptr = try self.resolveInst(scope, inst.positionals.array_ptr); + const uncasted_index = try self.resolveInst(scope, inst.positionals.index); + const elem_index = try self.coerce(scope, Type.initTag(.usize), uncasted_index); + + if (array_ptr.ty.isSinglePointer() and array_ptr.ty.elemType().zigTypeTag() == .Array) { + if (array_ptr.value()) |array_ptr_val| { + if (elem_index.value()) |index_val| { + // Both array pointer and index are compile-time known. + const index_u64 = index_val.toUnsignedInt(); + // @intCast here because it would have been impossible to construct a value that + // required a larger index. + const elem_ptr = try array_ptr_val.elemPtr(scope.arena(), @intCast(usize, index_u64)); + + const type_payload = try scope.arena().create(Type.Payload.SingleConstPointer); + type_payload.* = .{ .pointee_type = array_ptr.ty.elemType().elemType() }; + + return self.constInst(scope, inst.base.src, .{ + .ty = Type.initPayload(&type_payload.base), + .val = elem_ptr, + }); + } + } + } + + return self.fail(scope, inst.base.src, "TODO implement more analyze elemptr", .{}); +} + +fn analyzeInstAdd(self: *Module, scope: *Scope, inst: *zir.Inst.Add) InnerError!*Inst { + const lhs = try self.resolveInst(scope, inst.positionals.lhs); + const rhs = try self.resolveInst(scope, inst.positionals.rhs); + + if (lhs.ty.zigTypeTag() == .Int and rhs.ty.zigTypeTag() == .Int) { + if (lhs.value()) |lhs_val| { + if (rhs.value()) |rhs_val| { + // TODO is this a performance issue? maybe we should try the operation without + // resorting to BigInt first. + var lhs_space: Value.BigIntSpace = undefined; + var rhs_space: Value.BigIntSpace = undefined; + const lhs_bigint = lhs_val.toBigInt(&lhs_space); + const rhs_bigint = rhs_val.toBigInt(&rhs_space); + const limbs = try scope.arena().alloc( + std.math.big.Limb, + std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1, + ); + var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined }; + result_bigint.add(lhs_bigint, rhs_bigint); + const result_limbs = result_bigint.limbs[0..result_bigint.len]; + + if (!lhs.ty.eql(rhs.ty)) { + return self.fail(scope, inst.base.src, "TODO implement peer type resolution", .{}); + } + + const val_payload = if (result_bigint.positive) blk: { + const val_payload = try scope.arena().create(Value.Payload.IntBigPositive); + val_payload.* = .{ .limbs = result_limbs }; + break :blk &val_payload.base; + } else blk: { + const val_payload = try scope.arena().create(Value.Payload.IntBigNegative); + val_payload.* = .{ .limbs = result_limbs }; + break :blk &val_payload.base; + }; + + return self.constInst(scope, inst.base.src, .{ + .ty = lhs.ty, + .val = Value.initPayload(val_payload), + }); + } + } + } + + return self.fail(scope, inst.base.src, "TODO implement more analyze add", .{}); +} + +fn analyzeInstDeref(self: *Module, scope: *Scope, deref: *zir.Inst.Deref) InnerError!*Inst { + const ptr = try self.resolveInst(scope, deref.positionals.ptr); + return self.analyzeDeref(scope, deref.base.src, ptr, deref.positionals.ptr.src); +} + +fn analyzeDeref(self: *Module, scope: *Scope, src: usize, ptr: *Inst, ptr_src: usize) InnerError!*Inst { + const elem_ty = switch (ptr.ty.zigTypeTag()) { + .Pointer => ptr.ty.elemType(), + else => return self.fail(scope, ptr_src, "expected pointer, found '{}'", .{ptr.ty}), + }; + if (ptr.value()) |val| { + return self.constInst(scope, src, .{ + .ty = elem_ty, + .val = try val.pointerDeref(scope.arena()), + }); + } + + return self.fail(scope, src, "TODO implement runtime deref", .{}); +} + +fn analyzeInstAsm(self: *Module, scope: *Scope, assembly: *zir.Inst.Asm) InnerError!*Inst { + const return_type = try self.resolveType(scope, assembly.positionals.return_type); + const asm_source = try self.resolveConstString(scope, assembly.positionals.asm_source); + const output = if (assembly.kw_args.output) |o| try self.resolveConstString(scope, o) else null; + + const inputs = try scope.arena().alloc([]const u8, assembly.kw_args.inputs.len); + const clobbers = try scope.arena().alloc([]const u8, assembly.kw_args.clobbers.len); + const args = try scope.arena().alloc(*Inst, assembly.kw_args.args.len); + + for (inputs) |*elem, i| { + elem.* = try self.resolveConstString(scope, assembly.kw_args.inputs[i]); + } + for (clobbers) |*elem, i| { + elem.* = try self.resolveConstString(scope, assembly.kw_args.clobbers[i]); + } + for (args) |*elem, i| { + const arg = try self.resolveInst(scope, assembly.kw_args.args[i]); + elem.* = try self.coerce(scope, Type.initTag(.usize), arg); + } + + const b = try self.requireRuntimeBlock(scope, assembly.base.src); + return self.addNewInstArgs(b, assembly.base.src, return_type, Inst.Assembly, Inst.Args(Inst.Assembly){ + .asm_source = asm_source, + .is_volatile = assembly.kw_args.@"volatile", + .output = output, + .inputs = inputs, + .clobbers = clobbers, + .args = args, + }); +} + +fn analyzeInstCmp(self: *Module, scope: *Scope, inst: *zir.Inst.Cmp) InnerError!*Inst { + const lhs = try self.resolveInst(scope, inst.positionals.lhs); + const rhs = try self.resolveInst(scope, inst.positionals.rhs); + const op = inst.positionals.op; + + const is_equality_cmp = switch (op) { + .eq, .neq => true, + else => false, + }; + const lhs_ty_tag = lhs.ty.zigTypeTag(); + const rhs_ty_tag = rhs.ty.zigTypeTag(); + if (is_equality_cmp and lhs_ty_tag == .Null and rhs_ty_tag == .Null) { + // null == null, null != null + return self.constBool(scope, inst.base.src, op == .eq); + } else if (is_equality_cmp and + ((lhs_ty_tag == .Null and rhs_ty_tag == .Optional) or + rhs_ty_tag == .Null and lhs_ty_tag == .Optional)) + { + // comparing null with optionals + const opt_operand = if (lhs_ty_tag == .Optional) lhs else rhs; + if (opt_operand.value()) |opt_val| { + const is_null = opt_val.isNull(); + return self.constBool(scope, inst.base.src, if (op == .eq) is_null else !is_null); + } + const b = try self.requireRuntimeBlock(scope, inst.base.src); + switch (op) { + .eq => return self.addNewInstArgs( + b, + inst.base.src, + Type.initTag(.bool), + Inst.IsNull, + Inst.Args(Inst.IsNull){ .operand = opt_operand }, + ), + .neq => return self.addNewInstArgs( + b, + inst.base.src, + Type.initTag(.bool), + Inst.IsNonNull, + Inst.Args(Inst.IsNonNull){ .operand = opt_operand }, + ), + else => unreachable, + } + } else if (is_equality_cmp and + ((lhs_ty_tag == .Null and rhs.ty.isCPtr()) or (rhs_ty_tag == .Null and lhs.ty.isCPtr()))) + { + return self.fail(scope, inst.base.src, "TODO implement C pointer cmp", .{}); + } else if (lhs_ty_tag == .Null or rhs_ty_tag == .Null) { + const non_null_type = if (lhs_ty_tag == .Null) rhs.ty else lhs.ty; + return self.fail(scope, inst.base.src, "comparison of '{}' with null", .{non_null_type}); + } else if (is_equality_cmp and + ((lhs_ty_tag == .EnumLiteral and rhs_ty_tag == .Union) or + (rhs_ty_tag == .EnumLiteral and lhs_ty_tag == .Union))) + { + return self.fail(scope, inst.base.src, "TODO implement equality comparison between a union's tag value and an enum literal", .{}); + } else if (lhs_ty_tag == .ErrorSet and rhs_ty_tag == .ErrorSet) { + if (!is_equality_cmp) { + return self.fail(scope, inst.base.src, "{} operator not allowed for errors", .{@tagName(op)}); + } + return self.fail(scope, inst.base.src, "TODO implement equality comparison between errors", .{}); + } else if (lhs.ty.isNumeric() and rhs.ty.isNumeric()) { + // This operation allows any combination of integer and float types, regardless of the + // signed-ness, comptime-ness, and bit-width. So peer type resolution is incorrect for + // numeric types. + return self.cmpNumeric(scope, inst.base.src, lhs, rhs, op); + } + return self.fail(scope, inst.base.src, "TODO implement more cmp analysis", .{}); +} + +fn analyzeInstIsNull(self: *Module, scope: *Scope, inst: *zir.Inst.IsNull) InnerError!*Inst { + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.analyzeIsNull(scope, inst.base.src, operand, true); +} + +fn analyzeInstIsNonNull(self: *Module, scope: *Scope, inst: *zir.Inst.IsNonNull) InnerError!*Inst { + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.analyzeIsNull(scope, inst.base.src, operand, false); +} + +fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *zir.Inst.CondBr) InnerError!*Inst { + const uncasted_cond = try self.resolveInst(scope, inst.positionals.condition); + const cond = try self.coerce(scope, Type.initTag(.bool), uncasted_cond); + + if (try self.resolveDefinedValue(scope, cond)) |cond_val| { + const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; + try self.analyzeBody(scope, body.*); + return self.constVoid(scope, inst.base.src); + } + + const parent_block = try self.requireRuntimeBlock(scope, inst.base.src); + + var true_block: Scope.Block = .{ + .func = parent_block.func, + .decl = parent_block.decl, + .instructions = .{}, + .arena = parent_block.arena, + }; + defer true_block.instructions.deinit(self.allocator); + try self.analyzeBody(&true_block.base, inst.positionals.true_body); + + var false_block: Scope.Block = .{ + .func = parent_block.func, + .decl = parent_block.decl, + .instructions = .{}, + .arena = parent_block.arena, + }; + defer false_block.instructions.deinit(self.allocator); + try self.analyzeBody(&false_block.base, inst.positionals.false_body); + + return self.addNewInstArgs(parent_block, inst.base.src, Type.initTag(.void), Inst.CondBr, Inst.Args(Inst.CondBr){ + .condition = cond, + .true_body = .{ .instructions = try scope.arena().dupe(*Inst, true_block.instructions.items) }, + .false_body = .{ .instructions = try scope.arena().dupe(*Inst, false_block.instructions.items) }, + }); +} + +fn wantSafety(self: *Module, scope: *Scope) bool { + return switch (self.optimize_mode) { + .Debug => true, + .ReleaseSafe => true, + .ReleaseFast => false, + .ReleaseSmall => false, + }; +} + +fn analyzeInstUnreachable(self: *Module, scope: *Scope, unreach: *zir.Inst.Unreachable) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, unreach.base.src); + if (self.wantSafety(scope)) { + // TODO Once we have a panic function to call, call it here instead of this. + _ = try self.addNewInstArgs(b, unreach.base.src, Type.initTag(.void), Inst.Breakpoint, {}); + } + return self.addNewInstArgs(b, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {}); +} + +fn analyzeInstRet(self: *Module, scope: *Scope, inst: *zir.Inst.Return) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.Ret, {}); +} + +fn analyzeBody(self: *Module, scope: *Scope, body: zir.Module.Body) !void { + if (scope.cast(Scope.Block)) |b| { + const analysis = b.func.analysis.in_progress; + analysis.needed_inst_capacity += body.instructions.len; + try analysis.inst_table.ensureCapacity(analysis.needed_inst_capacity); + for (body.instructions) |src_inst| { + const new_inst = try self.analyzeInst(scope, src_inst); + analysis.inst_table.putAssumeCapacityNoClobber(src_inst, new_inst); + } + } else { + for (body.instructions) |src_inst| { + _ = try self.analyzeInst(scope, src_inst); + } + } +} + +fn analyzeIsNull( + self: *Module, + scope: *Scope, + src: usize, + operand: *Inst, + invert_logic: bool, +) InnerError!*Inst { + return self.fail(scope, src, "TODO implement analysis of isnull and isnotnull", .{}); +} + +/// Asserts that lhs and rhs types are both numeric. +fn cmpNumeric( + self: *Module, + scope: *Scope, + src: usize, + lhs: *Inst, + rhs: *Inst, + op: std.math.CompareOperator, +) !*Inst { + assert(lhs.ty.isNumeric()); + assert(rhs.ty.isNumeric()); + + const lhs_ty_tag = lhs.ty.zigTypeTag(); + const rhs_ty_tag = rhs.ty.zigTypeTag(); + + if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) { + if (lhs.ty.arrayLen() != rhs.ty.arrayLen()) { + return self.fail(scope, src, "vector length mismatch: {} and {}", .{ + lhs.ty.arrayLen(), + rhs.ty.arrayLen(), + }); + } + return self.fail(scope, src, "TODO implement support for vectors in cmpNumeric", .{}); + } else if (lhs_ty_tag == .Vector or rhs_ty_tag == .Vector) { + return self.fail(scope, src, "mixed scalar and vector operands to comparison operator: '{}' and '{}'", .{ + lhs.ty, + rhs.ty, + }); + } + + if (lhs.value()) |lhs_val| { + if (rhs.value()) |rhs_val| { + return self.constBool(scope, src, Value.compare(lhs_val, op, rhs_val)); + } + } + + // TODO handle comparisons against lazy zero values + // Some values can be compared against zero without being runtime known or without forcing + // a full resolution of their value, for example `@sizeOf(@Frame(function))` is known to + // always be nonzero, and we benefit from not forcing the full evaluation and stack frame layout + // of this function if we don't need to. + + // It must be a runtime comparison. + const b = try self.requireRuntimeBlock(scope, src); + // For floats, emit a float comparison instruction. + const lhs_is_float = switch (lhs_ty_tag) { + .Float, .ComptimeFloat => true, + else => false, + }; + const rhs_is_float = switch (rhs_ty_tag) { + .Float, .ComptimeFloat => true, + else => false, + }; + if (lhs_is_float and rhs_is_float) { + // Implicit cast the smaller one to the larger one. + const dest_type = x: { + if (lhs_ty_tag == .ComptimeFloat) { + break :x rhs.ty; + } else if (rhs_ty_tag == .ComptimeFloat) { + break :x lhs.ty; + } + if (lhs.ty.floatBits(self.target()) >= rhs.ty.floatBits(self.target())) { + break :x lhs.ty; + } else { + break :x rhs.ty; + } + }; + const casted_lhs = try self.coerce(scope, dest_type, lhs); + const casted_rhs = try self.coerce(scope, dest_type, rhs); + return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ + .lhs = casted_lhs, + .rhs = casted_rhs, + .op = op, + }); + } + // For mixed unsigned integer sizes, implicit cast both operands to the larger integer. + // For mixed signed and unsigned integers, implicit cast both operands to a signed + // integer with + 1 bit. + // For mixed floats and integers, extract the integer part from the float, cast that to + // a signed integer with mantissa bits + 1, and if there was any non-integral part of the float, + // add/subtract 1. + const lhs_is_signed = if (lhs.value()) |lhs_val| + lhs_val.compareWithZero(.lt) + else + (lhs.ty.isFloat() or lhs.ty.isSignedInt()); + const rhs_is_signed = if (rhs.value()) |rhs_val| + rhs_val.compareWithZero(.lt) + else + (rhs.ty.isFloat() or rhs.ty.isSignedInt()); + const dest_int_is_signed = lhs_is_signed or rhs_is_signed; + + var dest_float_type: ?Type = null; + + var lhs_bits: usize = undefined; + if (lhs.value()) |lhs_val| { + if (lhs_val.isUndef()) + return self.constUndef(scope, src, Type.initTag(.bool)); + const is_unsigned = if (lhs_is_float) x: { + var bigint_space: Value.BigIntSpace = undefined; + var bigint = try lhs_val.toBigInt(&bigint_space).toManaged(self.allocator); + defer bigint.deinit(); + const zcmp = lhs_val.orderAgainstZero(); + if (lhs_val.floatHasFraction()) { + switch (op) { + .eq => return self.constBool(scope, src, false), + .neq => return self.constBool(scope, src, true), + else => {}, + } + if (zcmp == .lt) { + try bigint.addScalar(bigint.toConst(), -1); + } else { + try bigint.addScalar(bigint.toConst(), 1); + } + } + lhs_bits = bigint.toConst().bitCountTwosComp(); + break :x (zcmp != .lt); + } else x: { + lhs_bits = lhs_val.intBitCountTwosComp(); + break :x (lhs_val.orderAgainstZero() != .lt); + }; + lhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); + } else if (lhs_is_float) { + dest_float_type = lhs.ty; + } else { + const int_info = lhs.ty.intInfo(self.target()); + lhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); + } + + var rhs_bits: usize = undefined; + if (rhs.value()) |rhs_val| { + if (rhs_val.isUndef()) + return self.constUndef(scope, src, Type.initTag(.bool)); + const is_unsigned = if (rhs_is_float) x: { + var bigint_space: Value.BigIntSpace = undefined; + var bigint = try rhs_val.toBigInt(&bigint_space).toManaged(self.allocator); + defer bigint.deinit(); + const zcmp = rhs_val.orderAgainstZero(); + if (rhs_val.floatHasFraction()) { + switch (op) { + .eq => return self.constBool(scope, src, false), + .neq => return self.constBool(scope, src, true), + else => {}, + } + if (zcmp == .lt) { + try bigint.addScalar(bigint.toConst(), -1); + } else { + try bigint.addScalar(bigint.toConst(), 1); + } + } + rhs_bits = bigint.toConst().bitCountTwosComp(); + break :x (zcmp != .lt); + } else x: { + rhs_bits = rhs_val.intBitCountTwosComp(); + break :x (rhs_val.orderAgainstZero() != .lt); + }; + rhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); + } else if (rhs_is_float) { + dest_float_type = rhs.ty; + } else { + const int_info = rhs.ty.intInfo(self.target()); + rhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); + } + + const dest_type = if (dest_float_type) |ft| ft else blk: { + const max_bits = std.math.max(lhs_bits, rhs_bits); + const casted_bits = std.math.cast(u16, max_bits) catch |err| switch (err) { + error.Overflow => return self.fail(scope, src, "{} exceeds maximum integer bit count", .{max_bits}), + }; + break :blk try self.makeIntType(scope, dest_int_is_signed, casted_bits); + }; + const casted_lhs = try self.coerce(scope, dest_type, lhs); + const casted_rhs = try self.coerce(scope, dest_type, lhs); + + return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ + .lhs = casted_lhs, + .rhs = casted_rhs, + .op = op, + }); +} + +fn makeIntType(self: *Module, scope: *Scope, signed: bool, bits: u16) !Type { + if (signed) { + const int_payload = try scope.arena().create(Type.Payload.IntSigned); + int_payload.* = .{ .bits = bits }; + return Type.initPayload(&int_payload.base); + } else { + const int_payload = try scope.arena().create(Type.Payload.IntUnsigned); + int_payload.* = .{ .bits = bits }; + return Type.initPayload(&int_payload.base); + } +} + +fn coerce(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { + // If the types are the same, we can return the operand. + if (dest_type.eql(inst.ty)) + return inst; + + const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty); + if (in_memory_result == .ok) { + return self.bitcast(scope, dest_type, inst); + } + + // *[N]T to []T + if (inst.ty.isSinglePointer() and dest_type.isSlice() and + (!inst.ty.pointerIsConst() or dest_type.pointerIsConst())) + { + const array_type = inst.ty.elemType(); + const dst_elem_type = dest_type.elemType(); + if (array_type.zigTypeTag() == .Array and + coerceInMemoryAllowed(dst_elem_type, array_type.elemType()) == .ok) + { + return self.coerceArrayPtrToSlice(scope, dest_type, inst); + } + } + + // comptime_int to fixed-width integer + if (inst.ty.zigTypeTag() == .ComptimeInt and dest_type.zigTypeTag() == .Int) { + // The representation is already correct; we only need to make sure it fits in the destination type. + const val = inst.value().?; // comptime_int always has comptime known value + if (!val.intFitsInType(dest_type, self.target())) { + return self.fail(scope, inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); + } + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } + + // integer widening + if (inst.ty.zigTypeTag() == .Int and dest_type.zigTypeTag() == .Int) { + const src_info = inst.ty.intInfo(self.target()); + const dst_info = dest_type.intInfo(self.target()); + if (src_info.signed == dst_info.signed and dst_info.bits >= src_info.bits) { + if (inst.value()) |val| { + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } else { + return self.fail(scope, inst.src, "TODO implement runtime integer widening", .{}); + } + } else { + return self.fail(scope, inst.src, "TODO implement more int widening {} to {}", .{ inst.ty, dest_type }); + } + } + + return self.fail(scope, inst.src, "TODO implement type coercion from {} to {}", .{ inst.ty, dest_type }); +} + +fn bitcast(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { + if (inst.value()) |val| { + // Keep the comptime Value representation; take the new type. + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } + // TODO validate the type size and other compile errors + const b = try self.requireRuntimeBlock(scope, inst.src); + return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst }); +} + +fn coerceArrayPtrToSlice(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { + if (inst.value()) |val| { + // The comptime Value representation is compatible with both types. + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } + return self.fail(scope, inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); +} + +fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { + @setCold(true); + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_files.ensureCapacity(self.failed_files.size + 1); + const err_msg = try ErrorMsg.create(self.allocator, src, format, args); + switch (scope.tag) { + .decl => { + const decl = scope.cast(Scope.DeclAnalysis).?.decl; + switch (decl.analysis) { + .initial_in_progress => decl.analysis = .initial_sema_failure, + .repeat_in_progress => decl.analysis = .repeat_sema_failure, + else => unreachable, + } + self.failed_decls.putAssumeCapacityNoClobber(decl, err_msg); + }, + .block => { + const block = scope.cast(Scope.Block).?; + block.func.analysis = .sema_failure; + self.failed_decls.putAssumeCapacityNoClobber(block.decl, err_msg); + }, + .zir_module => { + const zir_module = scope.cast(Scope.ZIRModule).?; + zir_module.status = .loaded_sema_failure; + self.failed_files.putAssumeCapacityNoClobber(zir_module, err_msg); + }, + } + return error.AnalysisFail; +} + +const InMemoryCoercionResult = enum { + ok, + no_match, +}; + +fn coerceInMemoryAllowed(dest_type: Type, src_type: Type) InMemoryCoercionResult { + if (dest_type.eql(src_type)) + return .ok; + + // TODO: implement more of this function + + return .no_match; +} + +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, + + pub fn create(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !*ErrorMsg { + const self = try allocator.create(ErrorMsg); + errdefer allocator.destroy(self); + self.* = try init(allocator, byte_offset, format, args); + return self; + } + + /// Assumes the ErrorMsg struct and msg were both allocated with allocator. + pub fn destroy(self: *ErrorMsg, allocator: *Allocator) void { + self.deinit(allocator); + allocator.destroy(self); + } + + pub fn init(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !ErrorMsg { + return ErrorMsg{ + .byte_offset = byte_offset, + .msg = try std.fmt.allocPrint(allocator, format, args), + }; + } + + pub fn deinit(self: *ErrorMsg, allocator: *Allocator) void { + allocator.free(self.msg); + self.* = undefined; + } +}; diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 6ebf68df90..ae1489136e 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -6,6 +6,8 @@ const Type = @import("type.zig").Type; const Value = @import("value.zig").Value; const TypedValue = @import("TypedValue.zig"); const link = @import("link.zig"); +const Module = @import("Module.zig"); +const ErrorMsg = Module.ErrorMsg; const Target = std.Target; const Allocator = mem.Allocator; @@ -14,7 +16,7 @@ pub const Result = union(enum) { appended: void, /// The value is available externally, `code` is unused. externally_managed: []const u8, - fail: *ir.ErrorMsg, + fail: *Module.ErrorMsg, }; pub fn generateSymbol( @@ -77,7 +79,7 @@ pub fn generateSymbol( } } return Result{ - .fail = try ir.ErrorMsg.create( + .fail = try ErrorMsg.create( bin_file.allocator, src, "TODO implement generateSymbol for more kinds of arrays", @@ -107,7 +109,7 @@ pub fn generateSymbol( return Result{ .appended = {} }; } return Result{ - .fail = try ir.ErrorMsg.create( + .fail = try ErrorMsg.create( bin_file.allocator, src, "TODO implement generateSymbol for pointer {}", @@ -123,7 +125,7 @@ pub fn generateSymbol( return Result{ .appended = {} }; } return Result{ - .fail = try ir.ErrorMsg.create( + .fail = try ErrorMsg.create( bin_file.allocator, src, "TODO implement generateSymbol for int type '{}'", @@ -133,7 +135,7 @@ pub fn generateSymbol( }, else => |t| { return Result{ - .fail = try ir.ErrorMsg.create( + .fail = try ErrorMsg.create( bin_file.allocator, src, "TODO implement generateSymbol for type '{}'", @@ -147,10 +149,10 @@ pub fn generateSymbol( const Function = struct { bin_file: *link.ElfFile, target: *const std.Target, - mod_fn: *const ir.Module.Fn, + mod_fn: *const Module.Fn, code: *std.ArrayList(u8), inst_table: std.AutoHashMap(*ir.Inst, MCValue), - err_msg: ?*ir.ErrorMsg, + err_msg: ?*ErrorMsg, const MCValue = union(enum) { none, @@ -570,7 +572,7 @@ const Function = struct { fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } { @setCold(true); assert(self.err_msg == null); - self.err_msg = try ir.ErrorMsg.create(self.code.allocator, src, format, args); + self.err_msg = try ErrorMsg.create(self.code.allocator, src, format, args); return error.CodegenFail; } }; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 2052479dae..330b1c4135 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -1,20 +1,9 @@ const std = @import("std"); -const mem = std.mem; -const Allocator = std.mem.Allocator; -const ArrayListUnmanaged = std.ArrayListUnmanaged; const Value = @import("value.zig").Value; const Type = @import("type.zig").Type; -const TypedValue = @import("TypedValue.zig"); -const assert = std.debug.assert; -const BigIntConst = std.math.big.int.Const; -const BigIntMutable = std.math.big.int.Mutable; -const Target = std.Target; -const Package = @import("Package.zig"); -const link = @import("link.zig"); +const Module = @import("Module.zig"); -pub const text = @import("ir/text.zig"); - -/// These are in-memory, analyzed instructions. See `text.Inst` for the representation +/// These are in-memory, analyzed instructions. See `zir.Inst` for the representation /// of instructions that correspond to the ZIR text format. /// This struct owns the `Value` and `Type` memory. When the struct is deallocated, /// so are the `Value` and `Type`. The value of a constant must be copied into @@ -166,2006 +155,3 @@ pub const Inst = struct { args: void, }; }; - -pub const Module = struct { - /// General-purpose allocator. - allocator: *Allocator, - /// Module owns this resource. - root_pkg: *Package, - /// Module owns this resource. - root_scope: *Scope.ZIRModule, - /// Pointer to externally managed resource. - bin_file: *link.ElfFile, - /// It's rare for a decl to be exported, so we save memory by having a sparse map of - /// Decl pointers to details about them being exported. - /// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. - decl_exports: std.AutoHashMap(*Decl, []*Export), - /// This models the Decls that perform exports, so that `decl_exports` can be updated when a Decl - /// is modified. Note that the key of this table is not the Decl being exported, but the Decl that - /// is performing the export of another Decl. - /// This table owns the Export memory. - export_owners: std.AutoHashMap(*Decl, []*Export), - /// Maps fully qualified namespaced names to the Decl struct for them. - decl_table: std.AutoHashMap(Decl.Hash, *Decl), - - optimize_mode: std.builtin.Mode, - link_error_flags: link.ElfFile.ErrorFlags = link.ElfFile.ErrorFlags{}, - - work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic), - - /// We optimize memory usage for a compilation with no compile errors by storing the - /// error messages and mapping outside of `Decl`. - /// The ErrorMsg memory is owned by the decl, using Module's allocator. - /// Note that a Decl can succeed but the Fn it represents can fail. In this case, - /// a Decl can have a failed_decls entry but have analysis status of success. - failed_decls: std.AutoHashMap(*Decl, *ErrorMsg), - /// Using a map here for consistency with the other fields here. - /// The ErrorMsg memory is owned by the `Scope.ZIRModule`, using Module's allocator. - failed_files: std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg), - /// Using a map here for consistency with the other fields here. - /// The ErrorMsg memory is owned by the `Export`, using Module's allocator. - failed_exports: std.AutoHashMap(*Export, *ErrorMsg), - - pub const WorkItem = union(enum) { - /// Write the machine code for a Decl to the output file. - codegen_decl: *Decl, - }; - - pub const Export = struct { - options: std.builtin.ExportOptions, - /// Byte offset into the file that contains the export directive. - src: usize, - /// Represents the position of the export, if any, in the output file. - link: link.ElfFile.Export, - /// The Decl that performs the export. Note that this is *not* the Decl being exported. - owner_decl: *Decl, - status: enum { - in_progress, - failed, - /// Indicates that the failure was due to a temporary issue, such as an I/O error - /// when writing to the output file. Retrying the export may succeed. - failed_retryable, - complete, - }, - }; - - pub const Decl = struct { - /// This name is relative to the containing namespace of the decl. It uses a null-termination - /// to save bytes, since there can be a lot of decls in a compilation. The null byte is not allowed - /// in symbol names, because executable file formats use null-terminated strings for symbol names. - /// All Decls have names, even values that are not bound to a zig namespace. This is necessary for - /// mapping them to an address in the output file. - /// Memory owned by this decl, using Module's allocator. - name: [*:0]const u8, - /// The direct parent container of the Decl. This field will need to get more fleshed out when - /// self-hosted supports proper struct types and Zig AST => ZIR. - /// Reference to externally owned memory. - scope: *Scope.ZIRModule, - /// Byte offset into the source file that contains this declaration. - /// This is the base offset that src offsets within this Decl are relative to. - src: usize, - /// The most recent value of the Decl after a successful semantic analysis. - /// The tag for this union is determined by the tag value of the analysis field. - typed_value: union { - never_succeeded: void, - most_recent: TypedValue.Managed, - }, - /// Represents the "shallow" analysis status. For example, for decls that are functions, - /// the function type is analyzed with this set to `in_progress`, however, the semantic - /// analysis of the function body is performed with this value set to `success`. Functions - /// have their own analysis status field. - analysis: enum { - initial_in_progress, - /// This Decl might be OK but it depends on another one which did not successfully complete - /// semantic analysis. This Decl never had a value computed. - initial_dependency_failure, - /// Semantic analysis failure. This Decl never had a value computed. - /// There will be a corresponding ErrorMsg in Module.failed_decls. - initial_sema_failure, - /// In this case the `typed_value.most_recent` can still be accessed. - /// There will be a corresponding ErrorMsg in Module.failed_decls. - codegen_failure, - /// In this case the `typed_value.most_recent` can still be accessed. - /// There will be a corresponding ErrorMsg in Module.failed_decls. - /// This indicates the failure was something like running out of disk space, - /// and attempting codegen again may succeed. - codegen_failure_retryable, - /// This Decl might be OK but it depends on another one which did not successfully complete - /// semantic analysis. There is a most recent value available. - repeat_dependency_failure, - /// Semantic anlaysis failure, but the `typed_value.most_recent` can be accessed. - /// There will be a corresponding ErrorMsg in Module.failed_decls. - repeat_sema_failure, - /// Completed successfully before; the `typed_value.most_recent` can be accessed, and - /// new semantic analysis is in progress. - repeat_in_progress, - /// Everything is done and updated. - complete, - }, - - /// Represents the position of the code in the output file. - /// This is populated regardless of semantic analysis and code generation. - link: link.ElfFile.Decl = link.ElfFile.Decl.empty, - - /// The shallow set of other decls whose typed_value could possibly change if this Decl's - /// typed_value is modified. - /// TODO look into using a lightweight map/set data structure rather than a linear array. - dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){}, - - contents_hash: Hash, - - pub fn destroy(self: *Decl, allocator: *Allocator) void { - allocator.free(mem.spanZ(self.name)); - if (self.typedValueManaged()) |tvm| { - tvm.deinit(allocator); - } - allocator.destroy(self); - } - - pub const Hash = [16]u8; - - /// If the name is small enough, it is used directly as the hash. - /// If it is long, blake3 hash is computed. - pub fn hashSimpleName(name: []const u8) Hash { - var out: Hash = undefined; - if (name.len <= Hash.len) { - mem.copy(u8, &out, name); - mem.set(u8, out[name.len..], 0); - } else { - std.crypto.Blake3.hash(name, &out); - } - return out; - } - - /// Must generate unique bytes with no collisions with other decls. - /// The point of hashing here is only to limit the number of bytes of - /// the unique identifier to a fixed size (16 bytes). - pub fn fullyQualifiedNameHash(self: Decl) Hash { - // Right now we only have ZIRModule as the source. So this is simply the - // relative name of the decl. - return hashSimpleName(mem.spanZ(u8, self.name)); - } - - pub fn typedValue(self: *Decl) error{AnalysisFail}!TypedValue { - const tvm = self.typedValueManaged() orelse return error.AnalysisFail; - return tvm.typed_value; - } - - pub fn value(self: *Decl) error{AnalysisFail}!Value { - return (try self.typedValue()).val; - } - - pub fn dump(self: *Decl) void { - const loc = std.zig.findLineColumn(self.scope.source.bytes, self.src); - std.debug.warn("{}:{}:{} name={} status={}", .{ - self.scope.sub_file_path, - loc.line + 1, - loc.column + 1, - mem.spanZ(self.name), - @tagName(self.analysis), - }); - if (self.typedValueManaged()) |tvm| { - std.debug.warn(" ty={} val={}", .{ tvm.typed_value.ty, tvm.typed_value.val }); - } - std.debug.warn("\n", .{}); - } - - fn typedValueManaged(self: *Decl) ?*TypedValue.Managed { - switch (self.analysis) { - .initial_in_progress, - .initial_dependency_failure, - .initial_sema_failure, - => return null, - .codegen_failure, - .codegen_failure_retryable, - .repeat_dependency_failure, - .repeat_sema_failure, - .repeat_in_progress, - .complete, - => return &self.typed_value.most_recent, - } - } - }; - - /// Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator. - pub const Fn = struct { - /// This memory owned by the Decl's TypedValue.Managed arena allocator. - fn_type: Type, - analysis: union(enum) { - /// The value is the source instruction. - queued: *text.Inst.Fn, - in_progress: *Analysis, - /// There will be a corresponding ErrorMsg in Module.failed_decls - sema_failure, - /// This Fn might be OK but it depends on another Decl which did not successfully complete - /// semantic analysis. - dependency_failure, - success: Body, - }, - - /// This memory is temporary and points to stack memory for the duration - /// of Fn analysis. - pub const Analysis = struct { - inner_block: Scope.Block, - /// TODO Performance optimization idea: instead of this inst_table, - /// use a field in the text.Inst instead to track corresponding instructions - inst_table: std.AutoHashMap(*text.Inst, *Inst), - needed_inst_capacity: usize, - }; - }; - - pub const Scope = struct { - tag: Tag, - - pub fn cast(base: *Scope, comptime T: type) ?*T { - if (base.tag != T.base_tag) - return null; - - return @fieldParentPtr(T, "base", base); - } - - /// Asserts the scope has a parent which is a DeclAnalysis and - /// returns the arena Allocator. - pub fn arena(self: *Scope) *Allocator { - switch (self.tag) { - .block => return self.cast(Block).?.arena, - .decl => return &self.cast(DeclAnalysis).?.arena.allocator, - .zir_module => return &self.cast(ZIRModule).?.contents.module.arena.allocator, - } - } - - /// Asserts the scope has a parent which is a DeclAnalysis and - /// returns the Decl. - pub fn decl(self: *Scope) *Decl { - switch (self.tag) { - .block => return self.cast(Block).?.decl, - .decl => return self.cast(DeclAnalysis).?.decl, - .zir_module => unreachable, - } - } - - /// Asserts the scope has a parent which is a ZIRModule and - /// returns it. - pub fn namespace(self: *Scope) *ZIRModule { - switch (self.tag) { - .block => return self.cast(Block).?.decl.scope, - .decl => return self.cast(DeclAnalysis).?.decl.scope, - .zir_module => return self.cast(ZIRModule).?, - } - } - - pub fn dumpInst(self: *Scope, inst: *Inst) void { - const zir_module = self.namespace(); - const loc = std.zig.findLineColumn(zir_module.source.bytes, inst.src); - std.debug.warn("{}:{}:{}: {}: ty={}\n", .{ - zir_module.sub_file_path, - loc.line + 1, - loc.column + 1, - @tagName(inst.tag), - inst.ty, - }); - } - - pub const Tag = enum { - zir_module, - block, - decl, - }; - - pub const ZIRModule = struct { - pub const base_tag: Tag = .zir_module; - base: Scope = Scope{ .tag = base_tag }, - /// Relative to the owning package's root_src_dir. - /// Reference to external memory, not owned by ZIRModule. - sub_file_path: []const u8, - source: union { - unloaded: void, - bytes: [:0]const u8, - }, - contents: union { - not_available: void, - module: *text.Module, - }, - status: enum { - never_loaded, - unloaded_success, - unloaded_parse_failure, - unloaded_sema_failure, - loaded_parse_failure, - loaded_sema_failure, - loaded_success, - }, - - pub fn unload(self: *ZIRModule, allocator: *Allocator) void { - switch (self.status) { - .never_loaded, - .unloaded_parse_failure, - .unloaded_sema_failure, - .unloaded_success, - => {}, - - .loaded_success => { - allocator.free(self.source.bytes); - self.contents.module.deinit(allocator); - allocator.destroy(self.contents.module); - self.status = .unloaded_success; - }, - .loaded_sema_failure => { - allocator.free(self.source.bytes); - self.contents.module.deinit(allocator); - allocator.destroy(self.contents.module); - self.status = .unloaded_sema_failure; - }, - .loaded_parse_failure => { - allocator.free(self.source.bytes); - self.status = .unloaded_parse_failure; - }, - } - } - - pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { - self.unload(allocator); - self.* = undefined; - } - - pub fn dumpSrc(self: *ZIRModule, src: usize) void { - const loc = std.zig.findLineColumn(self.source.bytes, src); - std.debug.warn("{}:{}:{}\n", .{ self.sub_file_path, loc.line + 1, loc.column + 1 }); - } - }; - - /// This is a temporary structure, references to it are valid only - /// during semantic analysis of the block. - pub const Block = struct { - pub const base_tag: Tag = .block; - base: Scope = Scope{ .tag = base_tag }, - func: *Fn, - decl: *Decl, - instructions: ArrayListUnmanaged(*Inst), - /// Points to the arena allocator of DeclAnalysis - arena: *Allocator, - }; - - /// This is a temporary structure, references to it are valid only - /// during semantic analysis of the decl. - pub const DeclAnalysis = struct { - pub const base_tag: Tag = .decl; - base: Scope = Scope{ .tag = base_tag }, - decl: *Decl, - arena: std.heap.ArenaAllocator, - }; - }; - - pub const Body = struct { - instructions: []*Inst, - }; - - pub const AllErrors = struct { - arena: std.heap.ArenaAllocator.State, - list: []const Message, - - pub const Message = struct { - src_path: []const u8, - line: usize, - column: usize, - byte_offset: usize, - msg: []const u8, - }; - - pub fn deinit(self: *AllErrors, allocator: *Allocator) void { - self.arena.promote(allocator).deinit(); - } - - fn add( - arena: *std.heap.ArenaAllocator, - errors: *std.ArrayList(Message), - sub_file_path: []const u8, - source: []const u8, - simple_err_msg: ErrorMsg, - ) !void { - const loc = std.zig.findLineColumn(source, simple_err_msg.byte_offset); - try errors.append(.{ - .src_path = try arena.allocator.dupe(u8, sub_file_path), - .msg = try arena.allocator.dupe(u8, simple_err_msg.msg), - .byte_offset = simple_err_msg.byte_offset, - .line = loc.line, - .column = loc.column, - }); - } - }; - - pub fn deinit(self: *Module) void { - const allocator = self.allocator; - self.work_queue.deinit(); - { - var it = self.decl_table.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.decl_table.deinit(); - } - { - var it = self.failed_decls.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.failed_decls.deinit(); - } - { - var it = self.failed_files.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.failed_files.deinit(); - } - { - var it = self.failed_exports.iterator(); - while (it.next()) |kv| { - kv.value.destroy(allocator); - } - self.failed_exports.deinit(); - } - { - var it = self.decl_exports.iterator(); - while (it.next()) |kv| { - const export_list = kv.value; - allocator.free(export_list); - } - self.decl_exports.deinit(); - } - { - var it = self.export_owners.iterator(); - while (it.next()) |kv| { - const export_list = kv.value; - for (export_list) |exp| { - allocator.destroy(exp); - } - allocator.free(export_list); - } - self.export_owners.deinit(); - } - self.root_pkg.destroy(); - { - self.root_scope.deinit(allocator); - allocator.destroy(self.root_scope); - } - self.* = undefined; - } - - pub fn target(self: Module) std.Target { - return self.bin_file.options.target; - } - - /// Detect changes to source files, perform semantic analysis, and update the output files. - pub fn update(self: *Module) !void { - // TODO Use the cache hash file system to detect which source files changed. - // Here we simulate a full cache miss. - // Analyze the root source file now. - self.analyzeRoot(self.root_scope) catch |err| switch (err) { - error.AnalysisFail => { - assert(self.totalErrorCount() != 0); - }, - else => |e| return e, - }; - - try self.performAllTheWork(); - - // Unload all the source files from memory. - self.root_scope.unload(self.allocator); - - try self.bin_file.flush(); - self.link_error_flags = self.bin_file.error_flags; - } - - pub fn totalErrorCount(self: *Module) usize { - return self.failed_decls.size + - self.failed_files.size + - self.failed_exports.size + - @boolToInt(self.link_error_flags.no_entry_point_found); - } - - pub fn getAllErrorsAlloc(self: *Module) !AllErrors { - var arena = std.heap.ArenaAllocator.init(self.allocator); - errdefer arena.deinit(); - - var errors = std.ArrayList(AllErrors.Message).init(self.allocator); - defer errors.deinit(); - - { - var it = self.failed_files.iterator(); - while (it.next()) |kv| { - const scope = kv.key; - const err_msg = kv.value; - const source = scope.source.bytes; - try AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg.*); - } - } - { - var it = self.failed_decls.iterator(); - while (it.next()) |kv| { - const decl = kv.key; - const err_msg = kv.value; - const source = decl.scope.source.bytes; - try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); - } - } - { - var it = self.failed_exports.iterator(); - while (it.next()) |kv| { - const decl = kv.key.owner_decl; - const err_msg = kv.value; - const source = decl.scope.source.bytes; - try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); - } - } - - if (self.link_error_flags.no_entry_point_found) { - try errors.append(.{ - .src_path = self.root_pkg.root_src_path, - .line = 0, - .column = 0, - .byte_offset = 0, - .msg = try std.fmt.allocPrint(&arena.allocator, "no entry point found", .{}), - }); - } - - assert(errors.items.len == self.totalErrorCount()); - - return AllErrors{ - .arena = arena.state, - .list = try arena.allocator.dupe(AllErrors.Message, errors.items), - }; - } - - const InnerError = error{ OutOfMemory, AnalysisFail }; - - pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { - while (self.work_queue.readItem()) |work_item| switch (work_item) { - .codegen_decl => |decl| switch (decl.analysis) { - .initial_in_progress, - .repeat_in_progress, - => unreachable, - - .initial_sema_failure, - .repeat_sema_failure, - .codegen_failure, - .initial_dependency_failure, - .repeat_dependency_failure, - => continue, - - .complete, .codegen_failure_retryable => { - if (decl.typed_value.most_recent.typed_value.val.cast(Value.Payload.Function)) |payload| { - switch (payload.func.analysis) { - .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { - error.AnalysisFail => { - if (payload.func.analysis == .queued) { - payload.func.analysis = .dependency_failure; - } - continue; - }, - else => |e| return e, - }, - .in_progress => unreachable, - .sema_failure, .dependency_failure => continue, - .success => {}, - } - } - - assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()); - - self.bin_file.updateDecl(self, decl) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.AnalysisFail => { - decl.analysis = .repeat_dependency_failure; - }, - else => { - try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); - self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( - self.allocator, - decl.src, - "unable to codegen: {}", - .{@errorName(err)}, - )); - decl.analysis = .codegen_failure_retryable; - }, - }; - }, - }, - }; - } - - fn getTextModule(self: *Module, root_scope: *Scope.ZIRModule) !*text.Module { - switch (root_scope.status) { - .never_loaded, .unloaded_success => { - try self.failed_files.ensureCapacity(self.failed_files.size + 1); - - var keep_source = false; - const source = try self.root_pkg.root_src_dir.readFileAllocOptions( - self.allocator, - self.root_pkg.root_src_path, - std.math.maxInt(u32), - 1, - 0, - ); - defer if (!keep_source) self.allocator.free(source); - - var keep_zir_module = false; - const zir_module = try self.allocator.create(text.Module); - defer if (!keep_zir_module) self.allocator.destroy(zir_module); - - zir_module.* = try text.parse(self.allocator, source); - defer if (!keep_zir_module) zir_module.deinit(self.allocator); - - if (zir_module.error_msg) |src_err_msg| { - self.failed_files.putAssumeCapacityNoClobber( - root_scope, - try ErrorMsg.create(self.allocator, src_err_msg.byte_offset, "{}", .{src_err_msg.msg}), - ); - root_scope.status = .loaded_parse_failure; - root_scope.source = .{ .bytes = source }; - keep_source = true; - return error.AnalysisFail; - } - - root_scope.status = .loaded_success; - root_scope.source = .{ .bytes = source }; - keep_source = true; - root_scope.contents = .{ .module = zir_module }; - keep_zir_module = true; - - return zir_module; - }, - - .unloaded_parse_failure, - .unloaded_sema_failure, - .loaded_parse_failure, - .loaded_sema_failure, - => return error.AnalysisFail, - .loaded_success => return root_scope.contents.module, - } - } - - fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { - // TODO use the cache to identify, from the modified source files, the decls which have - // changed based on the span of memory that represents the decl in the re-parsed source file. - // Use the cached dependency graph to recursively determine the set of decls which need - // regeneration. - // Here we simulate adding a source file which was previously not part of the compilation, - // which means scanning the decls looking for exports. - // TODO also identify decls that need to be deleted. - switch (root_scope.status) { - .never_loaded => { - const src_module = try self.getTextModule(root_scope); - - // Here we ensure enough queue capacity to store all the decls, so that later we can use - // appendAssumeCapacity. - try self.work_queue.ensureUnusedCapacity(src_module.decls.len); - - for (src_module.decls) |decl| { - if (decl.cast(text.Inst.Export)) |export_inst| { - _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); - } - } - }, - - .unloaded_parse_failure, - .unloaded_sema_failure, - .loaded_parse_failure, - .loaded_sema_failure, - .loaded_success, - .unloaded_success, - => { - const src_module = try self.getTextModule(root_scope); - - // Look for changed decls. - for (src_module.decls) |src_decl| { - const name_hash = Decl.hashSimpleName(src_decl.name); - if (self.decl_table.get(name_hash)) |kv| { - const decl = kv.value; - const new_contents_hash = Decl.hashSimpleName(src_decl.contents); - if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) { - // TODO recursive dependency management - std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); - self.decl_table.removeAssertDiscard(name_hash); - const saved_link = decl.link; - decl.destroy(self.allocator); - if (self.export_owners.getValue(decl)) |exports| { - @panic("TODO handle updating a decl that does an export"); - } - const new_decl = self.resolveDecl( - &root_scope.base, - src_decl, - saved_link, - ) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.AnalysisFail => continue, - }; - if (self.decl_exports.remove(decl)) |entry| { - self.decl_exports.putAssumeCapacityNoClobber(new_decl, entry.value); - } - } - } else if (src_decl.cast(text.Inst.Export)) |export_inst| { - _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); - } - } - }, - } - } - - fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { - // Use the Decl's arena for function memory. - var arena = decl.typed_value.most_recent.arena.?.promote(self.allocator); - defer decl.typed_value.most_recent.arena.?.* = arena.state; - var analysis: Fn.Analysis = .{ - .inner_block = .{ - .func = func, - .decl = decl, - .instructions = .{}, - .arena = &arena.allocator, - }, - .needed_inst_capacity = 0, - .inst_table = std.AutoHashMap(*text.Inst, *Inst).init(self.allocator), - }; - defer analysis.inner_block.instructions.deinit(self.allocator); - defer analysis.inst_table.deinit(); - - const fn_inst = func.analysis.queued; - func.analysis = .{ .in_progress = &analysis }; - - try self.analyzeBody(&analysis.inner_block.base, fn_inst.positionals.body); - - func.analysis = .{ - .success = .{ - .instructions = try arena.allocator.dupe(*Inst, analysis.inner_block.instructions.items), - }, - }; - } - - fn resolveDecl( - self: *Module, - scope: *Scope, - old_inst: *text.Inst, - bin_file_link: link.ElfFile.Decl, - ) InnerError!*Decl { - const hash = Decl.hashSimpleName(old_inst.name); - if (self.decl_table.get(hash)) |kv| { - return kv.value; - } else { - const new_decl = blk: { - try self.decl_table.ensureCapacity(self.decl_table.size + 1); - const new_decl = try self.allocator.create(Decl); - errdefer self.allocator.destroy(new_decl); - const name = try mem.dupeZ(self.allocator, u8, old_inst.name); - errdefer self.allocator.free(name); - new_decl.* = .{ - .name = name, - .scope = scope.namespace(), - .src = old_inst.src, - .typed_value = .{ .never_succeeded = {} }, - .analysis = .initial_in_progress, - .contents_hash = Decl.hashSimpleName(old_inst.contents), - .link = bin_file_link, - }; - self.decl_table.putAssumeCapacityNoClobber(hash, new_decl); - break :blk new_decl; - }; - - var decl_scope: Scope.DeclAnalysis = .{ - .decl = new_decl, - .arena = std.heap.ArenaAllocator.init(self.allocator), - }; - errdefer decl_scope.arena.deinit(); - - const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.AnalysisFail => { - switch (new_decl.analysis) { - .initial_in_progress => new_decl.analysis = .initial_dependency_failure, - .repeat_in_progress => new_decl.analysis = .repeat_dependency_failure, - else => {}, - } - return error.AnalysisFail; - }, - }; - const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); - - const has_codegen_bits = typed_value.ty.hasCodeGenBits(); - if (has_codegen_bits) { - // We don't fully codegen the decl until later, but we do need to reserve a global - // offset table index for it. This allows us to codegen decls out of dependency order, - // increasing how many computations can be done in parallel. - try self.bin_file.allocateDeclIndexes(new_decl); - } - - arena_state.* = decl_scope.arena.state; - - new_decl.typed_value = .{ - .most_recent = .{ - .typed_value = typed_value, - .arena = arena_state, - }, - }; - new_decl.analysis = .complete; - if (has_codegen_bits) { - // We ensureCapacity when scanning for decls. - self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); - } - return new_decl; - } - } - - fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { - const decl = try self.resolveDecl(scope, old_inst, link.ElfFile.Decl.empty); - switch (decl.analysis) { - .initial_in_progress => unreachable, - .repeat_in_progress => unreachable, - .initial_dependency_failure, - .repeat_dependency_failure, - .initial_sema_failure, - .repeat_sema_failure, - .codegen_failure, - .codegen_failure_retryable, - => return error.AnalysisFail, - - .complete => return decl, - } - } - - fn resolveInst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Inst { - if (scope.cast(Scope.Block)) |block| { - if (block.func.analysis.in_progress.inst_table.get(old_inst)) |kv| { - return kv.value; - } - } - - const decl = try self.resolveCompleteDecl(scope, old_inst); - const decl_ref = try self.analyzeDeclRef(scope, old_inst.src, decl); - return self.analyzeDeref(scope, old_inst.src, decl_ref, old_inst.src); - } - - fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { - return scope.cast(Scope.Block) orelse - return self.fail(scope, src, "instruction illegal outside function body", .{}); - } - - fn resolveInstConst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!TypedValue { - const new_inst = try self.resolveInst(scope, old_inst); - const val = try self.resolveConstValue(scope, new_inst); - return TypedValue{ - .ty = new_inst.ty, - .val = val, - }; - } - - fn resolveConstValue(self: *Module, scope: *Scope, base: *Inst) !Value { - return (try self.resolveDefinedValue(scope, base)) orelse - return self.fail(scope, base.src, "unable to resolve comptime value", .{}); - } - - fn resolveDefinedValue(self: *Module, scope: *Scope, base: *Inst) !?Value { - if (base.value()) |val| { - if (val.isUndef()) { - return self.fail(scope, base.src, "use of undefined value here causes undefined behavior", .{}); - } - return val; - } - return null; - } - - fn resolveConstString(self: *Module, scope: *Scope, old_inst: *text.Inst) ![]u8 { - const new_inst = try self.resolveInst(scope, old_inst); - const wanted_type = Type.initTag(.const_slice_u8); - const coerced_inst = try self.coerce(scope, wanted_type, new_inst); - const val = try self.resolveConstValue(scope, coerced_inst); - return val.toAllocatedBytes(scope.arena()); - } - - fn resolveType(self: *Module, scope: *Scope, old_inst: *text.Inst) !Type { - const new_inst = try self.resolveInst(scope, old_inst); - const wanted_type = Type.initTag(.@"type"); - const coerced_inst = try self.coerce(scope, wanted_type, new_inst); - const val = try self.resolveConstValue(scope, coerced_inst); - return val.toType(); - } - - fn analyzeExport(self: *Module, scope: *Scope, export_inst: *text.Inst.Export) InnerError!void { - try self.decl_exports.ensureCapacity(self.decl_exports.size + 1); - try self.export_owners.ensureCapacity(self.export_owners.size + 1); - const symbol_name = try self.resolveConstString(scope, export_inst.positionals.symbol_name); - const exported_decl = try self.resolveCompleteDecl(scope, export_inst.positionals.value); - const typed_value = exported_decl.typed_value.most_recent.typed_value; - switch (typed_value.ty.zigTypeTag()) { - .Fn => {}, - else => return self.fail( - scope, - export_inst.positionals.value.src, - "unable to export type '{}'", - .{typed_value.ty}, - ), - } - const new_export = try self.allocator.create(Export); - errdefer self.allocator.destroy(new_export); - - const owner_decl = scope.decl(); - - new_export.* = .{ - .options = .{ .name = symbol_name }, - .src = export_inst.base.src, - .link = .{}, - .owner_decl = owner_decl, - .status = .in_progress, - }; - - // Add to export_owners table. - const eo_gop = self.export_owners.getOrPut(owner_decl) catch unreachable; - if (!eo_gop.found_existing) { - eo_gop.kv.value = &[0]*Export{}; - } - eo_gop.kv.value = try self.allocator.realloc(eo_gop.kv.value, eo_gop.kv.value.len + 1); - eo_gop.kv.value[eo_gop.kv.value.len - 1] = new_export; - errdefer eo_gop.kv.value = self.allocator.shrink(eo_gop.kv.value, eo_gop.kv.value.len - 1); - - // Add to exported_decl table. - const de_gop = self.decl_exports.getOrPut(exported_decl) catch unreachable; - if (!de_gop.found_existing) { - de_gop.kv.value = &[0]*Export{}; - } - de_gop.kv.value = try self.allocator.realloc(de_gop.kv.value, de_gop.kv.value.len + 1); - de_gop.kv.value[de_gop.kv.value.len - 1] = new_export; - errdefer de_gop.kv.value = self.allocator.shrink(de_gop.kv.value, de_gop.kv.value.len - 1); - - self.bin_file.updateDeclExports(self, exported_decl, de_gop.kv.value) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => { - try self.failed_exports.ensureCapacity(self.failed_exports.size + 1); - self.failed_exports.putAssumeCapacityNoClobber(new_export, try ErrorMsg.create( - self.allocator, - export_inst.base.src, - "unable to export: {}", - .{@errorName(err)}, - )); - new_export.status = .failed_retryable; - }, - }; - } - - /// TODO should not need the cast on the last parameter at the callsites - fn addNewInstArgs( - self: *Module, - block: *Scope.Block, - src: usize, - ty: Type, - comptime T: type, - args: Inst.Args(T), - ) !*Inst { - const inst = try self.addNewInst(block, src, ty, T); - inst.args = args; - return &inst.base; - } - - fn addNewInst(self: *Module, block: *Scope.Block, src: usize, ty: Type, comptime T: type) !*T { - const inst = try block.arena.create(T); - inst.* = .{ - .base = .{ - .tag = T.base_tag, - .ty = ty, - .src = src, - }, - .args = undefined, - }; - try block.instructions.append(self.allocator, &inst.base); - return inst; - } - - fn constInst(self: *Module, scope: *Scope, src: usize, typed_value: TypedValue) !*Inst { - const const_inst = try scope.arena().create(Inst.Constant); - const_inst.* = .{ - .base = .{ - .tag = Inst.Constant.base_tag, - .ty = typed_value.ty, - .src = src, - }, - .val = typed_value.val, - }; - return &const_inst.base; - } - - fn constStr(self: *Module, scope: *Scope, src: usize, str: []const u8) !*Inst { - const ty_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); - ty_payload.* = .{ .len = str.len }; - - const bytes_payload = try scope.arena().create(Value.Payload.Bytes); - bytes_payload.* = .{ .data = str }; - - return self.constInst(scope, src, .{ - .ty = Type.initPayload(&ty_payload.base), - .val = Value.initPayload(&bytes_payload.base), - }); - } - - fn constType(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { - return self.constInst(scope, src, .{ - .ty = Type.initTag(.type), - .val = try ty.toValue(scope.arena()), - }); - } - - fn constVoid(self: *Module, scope: *Scope, src: usize) !*Inst { - return self.constInst(scope, src, .{ - .ty = Type.initTag(.void), - .val = Value.initTag(.the_one_possible_value), - }); - } - - fn constUndef(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { - return self.constInst(scope, src, .{ - .ty = ty, - .val = Value.initTag(.undef), - }); - } - - fn constBool(self: *Module, scope: *Scope, src: usize, v: bool) !*Inst { - return self.constInst(scope, src, .{ - .ty = Type.initTag(.bool), - .val = ([2]Value{ Value.initTag(.bool_false), Value.initTag(.bool_true) })[@boolToInt(v)], - }); - } - - fn constIntUnsigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: u64) !*Inst { - const int_payload = try scope.arena().create(Value.Payload.Int_u64); - int_payload.* = .{ .int = int }; - - return self.constInst(scope, src, .{ - .ty = ty, - .val = Value.initPayload(&int_payload.base), - }); - } - - fn constIntSigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: i64) !*Inst { - const int_payload = try scope.arena().create(Value.Payload.Int_i64); - int_payload.* = .{ .int = int }; - - return self.constInst(scope, src, .{ - .ty = ty, - .val = Value.initPayload(&int_payload.base), - }); - } - - fn constIntBig(self: *Module, scope: *Scope, src: usize, ty: Type, big_int: BigIntConst) !*Inst { - const val_payload = if (big_int.positive) blk: { - if (big_int.to(u64)) |x| { - return self.constIntUnsigned(scope, src, ty, x); - } else |err| switch (err) { - error.NegativeIntoUnsigned => unreachable, - error.TargetTooSmall => {}, // handled below - } - const big_int_payload = try scope.arena().create(Value.Payload.IntBigPositive); - big_int_payload.* = .{ .limbs = big_int.limbs }; - break :blk &big_int_payload.base; - } else blk: { - if (big_int.to(i64)) |x| { - return self.constIntSigned(scope, src, ty, x); - } else |err| switch (err) { - error.NegativeIntoUnsigned => unreachable, - error.TargetTooSmall => {}, // handled below - } - const big_int_payload = try scope.arena().create(Value.Payload.IntBigNegative); - big_int_payload.* = .{ .limbs = big_int.limbs }; - break :blk &big_int_payload.base; - }; - - return self.constInst(scope, src, .{ - .ty = ty, - .val = Value.initPayload(val_payload), - }); - } - - fn analyzeInstConst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!TypedValue { - const new_inst = try self.analyzeInst(scope, old_inst); - return TypedValue{ - .ty = new_inst.ty, - .val = try self.resolveConstValue(scope, new_inst), - }; - } - - fn analyzeInst(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Inst { - switch (old_inst.tag) { - .breakpoint => return self.analyzeInstBreakpoint(scope, old_inst.cast(text.Inst.Breakpoint).?), - .call => return self.analyzeInstCall(scope, old_inst.cast(text.Inst.Call).?), - .declref => return self.analyzeInstDeclRef(scope, old_inst.cast(text.Inst.DeclRef).?), - .str => { - const bytes = old_inst.cast(text.Inst.Str).?.positionals.bytes; - // The bytes references memory inside the ZIR text module, which can get deallocated - // after semantic analysis is complete. We need the memory to be in the Decl's arena. - const arena_bytes = try scope.arena().dupe(u8, bytes); - return self.constStr(scope, old_inst.src, arena_bytes); - }, - .int => { - const big_int = old_inst.cast(text.Inst.Int).?.positionals.int; - return self.constIntBig(scope, old_inst.src, Type.initTag(.comptime_int), big_int); - }, - .ptrtoint => return self.analyzeInstPtrToInt(scope, old_inst.cast(text.Inst.PtrToInt).?), - .fieldptr => return self.analyzeInstFieldPtr(scope, old_inst.cast(text.Inst.FieldPtr).?), - .deref => return self.analyzeInstDeref(scope, old_inst.cast(text.Inst.Deref).?), - .as => return self.analyzeInstAs(scope, old_inst.cast(text.Inst.As).?), - .@"asm" => return self.analyzeInstAsm(scope, old_inst.cast(text.Inst.Asm).?), - .@"unreachable" => return self.analyzeInstUnreachable(scope, old_inst.cast(text.Inst.Unreachable).?), - .@"return" => return self.analyzeInstRet(scope, old_inst.cast(text.Inst.Return).?), - .@"fn" => return self.analyzeInstFn(scope, old_inst.cast(text.Inst.Fn).?), - .@"export" => { - try self.analyzeExport(scope, old_inst.cast(text.Inst.Export).?); - return self.constVoid(scope, old_inst.src); - }, - .primitive => return self.analyzeInstPrimitive(scope, old_inst.cast(text.Inst.Primitive).?), - .ref => return self.analyzeInstRef(scope, old_inst.cast(text.Inst.Ref).?), - .fntype => return self.analyzeInstFnType(scope, old_inst.cast(text.Inst.FnType).?), - .intcast => return self.analyzeInstIntCast(scope, old_inst.cast(text.Inst.IntCast).?), - .bitcast => return self.analyzeInstBitCast(scope, old_inst.cast(text.Inst.BitCast).?), - .elemptr => return self.analyzeInstElemPtr(scope, old_inst.cast(text.Inst.ElemPtr).?), - .add => return self.analyzeInstAdd(scope, old_inst.cast(text.Inst.Add).?), - .cmp => return self.analyzeInstCmp(scope, old_inst.cast(text.Inst.Cmp).?), - .condbr => return self.analyzeInstCondBr(scope, old_inst.cast(text.Inst.CondBr).?), - .isnull => return self.analyzeInstIsNull(scope, old_inst.cast(text.Inst.IsNull).?), - .isnonnull => return self.analyzeInstIsNonNull(scope, old_inst.cast(text.Inst.IsNonNull).?), - } - } - - fn analyzeInstBreakpoint(self: *Module, scope: *Scope, inst: *text.Inst.Breakpoint) InnerError!*Inst { - const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); - } - - fn analyzeInstRef(self: *Module, scope: *Scope, inst: *text.Inst.Ref) InnerError!*Inst { - const decl = try self.resolveCompleteDecl(scope, inst.positionals.operand); - return self.analyzeDeclRef(scope, inst.base.src, decl); - } - - fn analyzeInstDeclRef(self: *Module, scope: *Scope, inst: *text.Inst.DeclRef) InnerError!*Inst { - const decl_name = try self.resolveConstString(scope, inst.positionals.name); - // This will need to get more fleshed out when there are proper structs & namespaces. - const zir_module = scope.namespace(); - for (zir_module.contents.module.decls) |src_decl| { - if (mem.eql(u8, src_decl.name, decl_name)) { - const decl = try self.resolveCompleteDecl(scope, src_decl); - return self.analyzeDeclRef(scope, inst.base.src, decl); - } - } - return self.fail(scope, inst.positionals.name.src, "use of undeclared identifier '{}'", .{decl_name}); - } - - fn analyzeDeclRef(self: *Module, scope: *Scope, src: usize, decl: *Decl) InnerError!*Inst { - const decl_tv = try decl.typedValue(); - const ty_payload = try scope.arena().create(Type.Payload.SingleConstPointer); - ty_payload.* = .{ .pointee_type = decl_tv.ty }; - const val_payload = try scope.arena().create(Value.Payload.DeclRef); - val_payload.* = .{ .decl = decl }; - return self.constInst(scope, src, .{ - .ty = Type.initPayload(&ty_payload.base), - .val = Value.initPayload(&val_payload.base), - }); - } - - fn analyzeInstCall(self: *Module, scope: *Scope, inst: *text.Inst.Call) InnerError!*Inst { - const func = try self.resolveInst(scope, inst.positionals.func); - if (func.ty.zigTypeTag() != .Fn) - return self.fail(scope, inst.positionals.func.src, "type '{}' not a function", .{func.ty}); - - const cc = func.ty.fnCallingConvention(); - if (cc == .Naked) { - // TODO add error note: declared here - return self.fail( - scope, - inst.positionals.func.src, - "unable to call function with naked calling convention", - .{}, - ); - } - const call_params_len = inst.positionals.args.len; - const fn_params_len = func.ty.fnParamLen(); - if (func.ty.fnIsVarArgs()) { - if (call_params_len < fn_params_len) { - // TODO add error note: declared here - return self.fail( - scope, - inst.positionals.func.src, - "expected at least {} arguments, found {}", - .{ fn_params_len, call_params_len }, - ); - } - return self.fail(scope, inst.base.src, "TODO implement support for calling var args functions", .{}); - } else if (fn_params_len != call_params_len) { - // TODO add error note: declared here - return self.fail( - scope, - inst.positionals.func.src, - "expected {} arguments, found {}", - .{ fn_params_len, call_params_len }, - ); - } - - if (inst.kw_args.modifier == .compile_time) { - return self.fail(scope, inst.base.src, "TODO implement comptime function calls", .{}); - } - if (inst.kw_args.modifier != .auto) { - return self.fail(scope, inst.base.src, "TODO implement call with modifier {}", .{inst.kw_args.modifier}); - } - - // TODO handle function calls of generic functions - - const fn_param_types = try self.allocator.alloc(Type, fn_params_len); - defer self.allocator.free(fn_param_types); - func.ty.fnParamTypes(fn_param_types); - - const casted_args = try scope.arena().alloc(*Inst, fn_params_len); - for (inst.positionals.args) |src_arg, i| { - const uncasted_arg = try self.resolveInst(scope, src_arg); - casted_args[i] = try self.coerce(scope, fn_param_types[i], uncasted_arg); - } - - const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Call, Inst.Args(Inst.Call){ - .func = func, - .args = casted_args, - }); - } - - fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *text.Inst.Fn) InnerError!*Inst { - const fn_type = try self.resolveType(scope, fn_inst.positionals.fn_type); - const new_func = try scope.arena().create(Fn); - new_func.* = .{ - .fn_type = fn_type, - .analysis = .{ .queued = fn_inst }, - }; - const fn_payload = try scope.arena().create(Value.Payload.Function); - fn_payload.* = .{ .func = new_func }; - return self.constInst(scope, fn_inst.base.src, .{ - .ty = fn_type, - .val = Value.initPayload(&fn_payload.base), - }); - } - - fn analyzeInstFnType(self: *Module, scope: *Scope, fntype: *text.Inst.FnType) InnerError!*Inst { - const return_type = try self.resolveType(scope, fntype.positionals.return_type); - - if (return_type.zigTypeTag() == .NoReturn and - fntype.positionals.param_types.len == 0 and - fntype.kw_args.cc == .Unspecified) - { - return self.constType(scope, fntype.base.src, Type.initTag(.fn_noreturn_no_args)); - } - - if (return_type.zigTypeTag() == .NoReturn and - fntype.positionals.param_types.len == 0 and - fntype.kw_args.cc == .Naked) - { - return self.constType(scope, fntype.base.src, Type.initTag(.fn_naked_noreturn_no_args)); - } - - if (return_type.zigTypeTag() == .Void and - fntype.positionals.param_types.len == 0 and - fntype.kw_args.cc == .C) - { - return self.constType(scope, fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); - } - - return self.fail(scope, fntype.base.src, "TODO implement fntype instruction more", .{}); - } - - fn analyzeInstPrimitive(self: *Module, scope: *Scope, primitive: *text.Inst.Primitive) InnerError!*Inst { - return self.constType(scope, primitive.base.src, primitive.positionals.tag.toType()); - } - - fn analyzeInstAs(self: *Module, scope: *Scope, as: *text.Inst.As) InnerError!*Inst { - const dest_type = try self.resolveType(scope, as.positionals.dest_type); - const new_inst = try self.resolveInst(scope, as.positionals.value); - return self.coerce(scope, dest_type, new_inst); - } - - fn analyzeInstPtrToInt(self: *Module, scope: *Scope, ptrtoint: *text.Inst.PtrToInt) InnerError!*Inst { - const ptr = try self.resolveInst(scope, ptrtoint.positionals.ptr); - if (ptr.ty.zigTypeTag() != .Pointer) { - return self.fail(scope, ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); - } - // TODO handle known-pointer-address - const b = try self.requireRuntimeBlock(scope, ptrtoint.base.src); - const ty = Type.initTag(.usize); - return self.addNewInstArgs(b, ptrtoint.base.src, ty, Inst.PtrToInt, Inst.Args(Inst.PtrToInt){ .ptr = ptr }); - } - - fn analyzeInstFieldPtr(self: *Module, scope: *Scope, fieldptr: *text.Inst.FieldPtr) InnerError!*Inst { - const object_ptr = try self.resolveInst(scope, fieldptr.positionals.object_ptr); - const field_name = try self.resolveConstString(scope, fieldptr.positionals.field_name); - - const elem_ty = switch (object_ptr.ty.zigTypeTag()) { - .Pointer => object_ptr.ty.elemType(), - else => return self.fail(scope, fieldptr.positionals.object_ptr.src, "expected pointer, found '{}'", .{object_ptr.ty}), - }; - switch (elem_ty.zigTypeTag()) { - .Array => { - if (mem.eql(u8, field_name, "len")) { - const len_payload = try scope.arena().create(Value.Payload.Int_u64); - len_payload.* = .{ .int = elem_ty.arrayLen() }; - - const ref_payload = try scope.arena().create(Value.Payload.RefVal); - ref_payload.* = .{ .val = Value.initPayload(&len_payload.base) }; - - return self.constInst(scope, fieldptr.base.src, .{ - .ty = Type.initTag(.single_const_pointer_to_comptime_int), - .val = Value.initPayload(&ref_payload.base), - }); - } else { - return self.fail( - scope, - fieldptr.positionals.field_name.src, - "no member named '{}' in '{}'", - .{ field_name, elem_ty }, - ); - } - }, - else => return self.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), - } - } - - fn analyzeInstIntCast(self: *Module, scope: *Scope, intcast: *text.Inst.IntCast) InnerError!*Inst { - const dest_type = try self.resolveType(scope, intcast.positionals.dest_type); - const new_inst = try self.resolveInst(scope, intcast.positionals.value); - - const dest_is_comptime_int = switch (dest_type.zigTypeTag()) { - .ComptimeInt => true, - .Int => false, - else => return self.fail( - scope, - intcast.positionals.dest_type.src, - "expected integer type, found '{}'", - .{ - dest_type, - }, - ), - }; - - switch (new_inst.ty.zigTypeTag()) { - .ComptimeInt, .Int => {}, - else => return self.fail( - scope, - intcast.positionals.value.src, - "expected integer type, found '{}'", - .{new_inst.ty}, - ), - } - - if (dest_is_comptime_int or new_inst.value() != null) { - return self.coerce(scope, dest_type, new_inst); - } - - return self.fail(scope, intcast.base.src, "TODO implement analyze widen or shorten int", .{}); - } - - fn analyzeInstBitCast(self: *Module, scope: *Scope, inst: *text.Inst.BitCast) InnerError!*Inst { - const dest_type = try self.resolveType(scope, inst.positionals.dest_type); - const operand = try self.resolveInst(scope, inst.positionals.operand); - return self.bitcast(scope, dest_type, operand); - } - - fn analyzeInstElemPtr(self: *Module, scope: *Scope, inst: *text.Inst.ElemPtr) InnerError!*Inst { - const array_ptr = try self.resolveInst(scope, inst.positionals.array_ptr); - const uncasted_index = try self.resolveInst(scope, inst.positionals.index); - const elem_index = try self.coerce(scope, Type.initTag(.usize), uncasted_index); - - if (array_ptr.ty.isSinglePointer() and array_ptr.ty.elemType().zigTypeTag() == .Array) { - if (array_ptr.value()) |array_ptr_val| { - if (elem_index.value()) |index_val| { - // Both array pointer and index are compile-time known. - const index_u64 = index_val.toUnsignedInt(); - // @intCast here because it would have been impossible to construct a value that - // required a larger index. - const elem_ptr = try array_ptr_val.elemPtr(scope.arena(), @intCast(usize, index_u64)); - - const type_payload = try scope.arena().create(Type.Payload.SingleConstPointer); - type_payload.* = .{ .pointee_type = array_ptr.ty.elemType().elemType() }; - - return self.constInst(scope, inst.base.src, .{ - .ty = Type.initPayload(&type_payload.base), - .val = elem_ptr, - }); - } - } - } - - return self.fail(scope, inst.base.src, "TODO implement more analyze elemptr", .{}); - } - - fn analyzeInstAdd(self: *Module, scope: *Scope, inst: *text.Inst.Add) InnerError!*Inst { - const lhs = try self.resolveInst(scope, inst.positionals.lhs); - const rhs = try self.resolveInst(scope, inst.positionals.rhs); - - if (lhs.ty.zigTypeTag() == .Int and rhs.ty.zigTypeTag() == .Int) { - if (lhs.value()) |lhs_val| { - if (rhs.value()) |rhs_val| { - // TODO is this a performance issue? maybe we should try the operation without - // resorting to BigInt first. - var lhs_space: Value.BigIntSpace = undefined; - var rhs_space: Value.BigIntSpace = undefined; - const lhs_bigint = lhs_val.toBigInt(&lhs_space); - const rhs_bigint = rhs_val.toBigInt(&rhs_space); - const limbs = try scope.arena().alloc( - std.math.big.Limb, - std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1, - ); - var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined }; - result_bigint.add(lhs_bigint, rhs_bigint); - const result_limbs = result_bigint.limbs[0..result_bigint.len]; - - if (!lhs.ty.eql(rhs.ty)) { - return self.fail(scope, inst.base.src, "TODO implement peer type resolution", .{}); - } - - const val_payload = if (result_bigint.positive) blk: { - const val_payload = try scope.arena().create(Value.Payload.IntBigPositive); - val_payload.* = .{ .limbs = result_limbs }; - break :blk &val_payload.base; - } else blk: { - const val_payload = try scope.arena().create(Value.Payload.IntBigNegative); - val_payload.* = .{ .limbs = result_limbs }; - break :blk &val_payload.base; - }; - - return self.constInst(scope, inst.base.src, .{ - .ty = lhs.ty, - .val = Value.initPayload(val_payload), - }); - } - } - } - - return self.fail(scope, inst.base.src, "TODO implement more analyze add", .{}); - } - - fn analyzeInstDeref(self: *Module, scope: *Scope, deref: *text.Inst.Deref) InnerError!*Inst { - const ptr = try self.resolveInst(scope, deref.positionals.ptr); - return self.analyzeDeref(scope, deref.base.src, ptr, deref.positionals.ptr.src); - } - - fn analyzeDeref(self: *Module, scope: *Scope, src: usize, ptr: *Inst, ptr_src: usize) InnerError!*Inst { - const elem_ty = switch (ptr.ty.zigTypeTag()) { - .Pointer => ptr.ty.elemType(), - else => return self.fail(scope, ptr_src, "expected pointer, found '{}'", .{ptr.ty}), - }; - if (ptr.value()) |val| { - return self.constInst(scope, src, .{ - .ty = elem_ty, - .val = try val.pointerDeref(scope.arena()), - }); - } - - return self.fail(scope, src, "TODO implement runtime deref", .{}); - } - - fn analyzeInstAsm(self: *Module, scope: *Scope, assembly: *text.Inst.Asm) InnerError!*Inst { - const return_type = try self.resolveType(scope, assembly.positionals.return_type); - const asm_source = try self.resolveConstString(scope, assembly.positionals.asm_source); - const output = if (assembly.kw_args.output) |o| try self.resolveConstString(scope, o) else null; - - const inputs = try scope.arena().alloc([]const u8, assembly.kw_args.inputs.len); - const clobbers = try scope.arena().alloc([]const u8, assembly.kw_args.clobbers.len); - const args = try scope.arena().alloc(*Inst, assembly.kw_args.args.len); - - for (inputs) |*elem, i| { - elem.* = try self.resolveConstString(scope, assembly.kw_args.inputs[i]); - } - for (clobbers) |*elem, i| { - elem.* = try self.resolveConstString(scope, assembly.kw_args.clobbers[i]); - } - for (args) |*elem, i| { - const arg = try self.resolveInst(scope, assembly.kw_args.args[i]); - elem.* = try self.coerce(scope, Type.initTag(.usize), arg); - } - - const b = try self.requireRuntimeBlock(scope, assembly.base.src); - return self.addNewInstArgs(b, assembly.base.src, return_type, Inst.Assembly, Inst.Args(Inst.Assembly){ - .asm_source = asm_source, - .is_volatile = assembly.kw_args.@"volatile", - .output = output, - .inputs = inputs, - .clobbers = clobbers, - .args = args, - }); - } - - fn analyzeInstCmp(self: *Module, scope: *Scope, inst: *text.Inst.Cmp) InnerError!*Inst { - const lhs = try self.resolveInst(scope, inst.positionals.lhs); - const rhs = try self.resolveInst(scope, inst.positionals.rhs); - const op = inst.positionals.op; - - const is_equality_cmp = switch (op) { - .eq, .neq => true, - else => false, - }; - const lhs_ty_tag = lhs.ty.zigTypeTag(); - const rhs_ty_tag = rhs.ty.zigTypeTag(); - if (is_equality_cmp and lhs_ty_tag == .Null and rhs_ty_tag == .Null) { - // null == null, null != null - return self.constBool(scope, inst.base.src, op == .eq); - } else if (is_equality_cmp and - ((lhs_ty_tag == .Null and rhs_ty_tag == .Optional) or - rhs_ty_tag == .Null and lhs_ty_tag == .Optional)) - { - // comparing null with optionals - const opt_operand = if (lhs_ty_tag == .Optional) lhs else rhs; - if (opt_operand.value()) |opt_val| { - const is_null = opt_val.isNull(); - return self.constBool(scope, inst.base.src, if (op == .eq) is_null else !is_null); - } - const b = try self.requireRuntimeBlock(scope, inst.base.src); - switch (op) { - .eq => return self.addNewInstArgs( - b, - inst.base.src, - Type.initTag(.bool), - Inst.IsNull, - Inst.Args(Inst.IsNull){ .operand = opt_operand }, - ), - .neq => return self.addNewInstArgs( - b, - inst.base.src, - Type.initTag(.bool), - Inst.IsNonNull, - Inst.Args(Inst.IsNonNull){ .operand = opt_operand }, - ), - else => unreachable, - } - } else if (is_equality_cmp and - ((lhs_ty_tag == .Null and rhs.ty.isCPtr()) or (rhs_ty_tag == .Null and lhs.ty.isCPtr()))) - { - return self.fail(scope, inst.base.src, "TODO implement C pointer cmp", .{}); - } else if (lhs_ty_tag == .Null or rhs_ty_tag == .Null) { - const non_null_type = if (lhs_ty_tag == .Null) rhs.ty else lhs.ty; - return self.fail(scope, inst.base.src, "comparison of '{}' with null", .{non_null_type}); - } else if (is_equality_cmp and - ((lhs_ty_tag == .EnumLiteral and rhs_ty_tag == .Union) or - (rhs_ty_tag == .EnumLiteral and lhs_ty_tag == .Union))) - { - return self.fail(scope, inst.base.src, "TODO implement equality comparison between a union's tag value and an enum literal", .{}); - } else if (lhs_ty_tag == .ErrorSet and rhs_ty_tag == .ErrorSet) { - if (!is_equality_cmp) { - return self.fail(scope, inst.base.src, "{} operator not allowed for errors", .{@tagName(op)}); - } - return self.fail(scope, inst.base.src, "TODO implement equality comparison between errors", .{}); - } else if (lhs.ty.isNumeric() and rhs.ty.isNumeric()) { - // This operation allows any combination of integer and float types, regardless of the - // signed-ness, comptime-ness, and bit-width. So peer type resolution is incorrect for - // numeric types. - return self.cmpNumeric(scope, inst.base.src, lhs, rhs, op); - } - return self.fail(scope, inst.base.src, "TODO implement more cmp analysis", .{}); - } - - fn analyzeInstIsNull(self: *Module, scope: *Scope, inst: *text.Inst.IsNull) InnerError!*Inst { - const operand = try self.resolveInst(scope, inst.positionals.operand); - return self.analyzeIsNull(scope, inst.base.src, operand, true); - } - - fn analyzeInstIsNonNull(self: *Module, scope: *Scope, inst: *text.Inst.IsNonNull) InnerError!*Inst { - const operand = try self.resolveInst(scope, inst.positionals.operand); - return self.analyzeIsNull(scope, inst.base.src, operand, false); - } - - fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *text.Inst.CondBr) InnerError!*Inst { - const uncasted_cond = try self.resolveInst(scope, inst.positionals.condition); - const cond = try self.coerce(scope, Type.initTag(.bool), uncasted_cond); - - if (try self.resolveDefinedValue(scope, cond)) |cond_val| { - const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; - try self.analyzeBody(scope, body.*); - return self.constVoid(scope, inst.base.src); - } - - const parent_block = try self.requireRuntimeBlock(scope, inst.base.src); - - var true_block: Scope.Block = .{ - .func = parent_block.func, - .decl = parent_block.decl, - .instructions = .{}, - .arena = parent_block.arena, - }; - defer true_block.instructions.deinit(self.allocator); - try self.analyzeBody(&true_block.base, inst.positionals.true_body); - - var false_block: Scope.Block = .{ - .func = parent_block.func, - .decl = parent_block.decl, - .instructions = .{}, - .arena = parent_block.arena, - }; - defer false_block.instructions.deinit(self.allocator); - try self.analyzeBody(&false_block.base, inst.positionals.false_body); - - return self.addNewInstArgs(parent_block, inst.base.src, Type.initTag(.void), Inst.CondBr, Inst.Args(Inst.CondBr){ - .condition = cond, - .true_body = .{ .instructions = try scope.arena().dupe(*Inst, true_block.instructions.items) }, - .false_body = .{ .instructions = try scope.arena().dupe(*Inst, false_block.instructions.items) }, - }); - } - - fn wantSafety(self: *Module, scope: *Scope) bool { - return switch (self.optimize_mode) { - .Debug => true, - .ReleaseSafe => true, - .ReleaseFast => false, - .ReleaseSmall => false, - }; - } - - fn analyzeInstUnreachable(self: *Module, scope: *Scope, unreach: *text.Inst.Unreachable) InnerError!*Inst { - const b = try self.requireRuntimeBlock(scope, unreach.base.src); - if (self.wantSafety(scope)) { - // TODO Once we have a panic function to call, call it here instead of this. - _ = try self.addNewInstArgs(b, unreach.base.src, Type.initTag(.void), Inst.Breakpoint, {}); - } - return self.addNewInstArgs(b, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {}); - } - - fn analyzeInstRet(self: *Module, scope: *Scope, inst: *text.Inst.Return) InnerError!*Inst { - const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.Ret, {}); - } - - fn analyzeBody(self: *Module, scope: *Scope, body: text.Module.Body) !void { - if (scope.cast(Scope.Block)) |b| { - const analysis = b.func.analysis.in_progress; - analysis.needed_inst_capacity += body.instructions.len; - try analysis.inst_table.ensureCapacity(analysis.needed_inst_capacity); - for (body.instructions) |src_inst| { - const new_inst = try self.analyzeInst(scope, src_inst); - analysis.inst_table.putAssumeCapacityNoClobber(src_inst, new_inst); - } - } else { - for (body.instructions) |src_inst| { - _ = try self.analyzeInst(scope, src_inst); - } - } - } - - fn analyzeIsNull( - self: *Module, - scope: *Scope, - src: usize, - operand: *Inst, - invert_logic: bool, - ) InnerError!*Inst { - return self.fail(scope, src, "TODO implement analysis of isnull and isnotnull", .{}); - } - - /// Asserts that lhs and rhs types are both numeric. - fn cmpNumeric( - self: *Module, - scope: *Scope, - src: usize, - lhs: *Inst, - rhs: *Inst, - op: std.math.CompareOperator, - ) !*Inst { - assert(lhs.ty.isNumeric()); - assert(rhs.ty.isNumeric()); - - const lhs_ty_tag = lhs.ty.zigTypeTag(); - const rhs_ty_tag = rhs.ty.zigTypeTag(); - - if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) { - if (lhs.ty.arrayLen() != rhs.ty.arrayLen()) { - return self.fail(scope, src, "vector length mismatch: {} and {}", .{ - lhs.ty.arrayLen(), - rhs.ty.arrayLen(), - }); - } - return self.fail(scope, src, "TODO implement support for vectors in cmpNumeric", .{}); - } else if (lhs_ty_tag == .Vector or rhs_ty_tag == .Vector) { - return self.fail(scope, src, "mixed scalar and vector operands to comparison operator: '{}' and '{}'", .{ - lhs.ty, - rhs.ty, - }); - } - - if (lhs.value()) |lhs_val| { - if (rhs.value()) |rhs_val| { - return self.constBool(scope, src, Value.compare(lhs_val, op, rhs_val)); - } - } - - // TODO handle comparisons against lazy zero values - // Some values can be compared against zero without being runtime known or without forcing - // a full resolution of their value, for example `@sizeOf(@Frame(function))` is known to - // always be nonzero, and we benefit from not forcing the full evaluation and stack frame layout - // of this function if we don't need to. - - // It must be a runtime comparison. - const b = try self.requireRuntimeBlock(scope, src); - // For floats, emit a float comparison instruction. - const lhs_is_float = switch (lhs_ty_tag) { - .Float, .ComptimeFloat => true, - else => false, - }; - const rhs_is_float = switch (rhs_ty_tag) { - .Float, .ComptimeFloat => true, - else => false, - }; - if (lhs_is_float and rhs_is_float) { - // Implicit cast the smaller one to the larger one. - const dest_type = x: { - if (lhs_ty_tag == .ComptimeFloat) { - break :x rhs.ty; - } else if (rhs_ty_tag == .ComptimeFloat) { - break :x lhs.ty; - } - if (lhs.ty.floatBits(self.target()) >= rhs.ty.floatBits(self.target())) { - break :x lhs.ty; - } else { - break :x rhs.ty; - } - }; - const casted_lhs = try self.coerce(scope, dest_type, lhs); - const casted_rhs = try self.coerce(scope, dest_type, rhs); - return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ - .lhs = casted_lhs, - .rhs = casted_rhs, - .op = op, - }); - } - // For mixed unsigned integer sizes, implicit cast both operands to the larger integer. - // For mixed signed and unsigned integers, implicit cast both operands to a signed - // integer with + 1 bit. - // For mixed floats and integers, extract the integer part from the float, cast that to - // a signed integer with mantissa bits + 1, and if there was any non-integral part of the float, - // add/subtract 1. - const lhs_is_signed = if (lhs.value()) |lhs_val| - lhs_val.compareWithZero(.lt) - else - (lhs.ty.isFloat() or lhs.ty.isSignedInt()); - const rhs_is_signed = if (rhs.value()) |rhs_val| - rhs_val.compareWithZero(.lt) - else - (rhs.ty.isFloat() or rhs.ty.isSignedInt()); - const dest_int_is_signed = lhs_is_signed or rhs_is_signed; - - var dest_float_type: ?Type = null; - - var lhs_bits: usize = undefined; - if (lhs.value()) |lhs_val| { - if (lhs_val.isUndef()) - return self.constUndef(scope, src, Type.initTag(.bool)); - const is_unsigned = if (lhs_is_float) x: { - var bigint_space: Value.BigIntSpace = undefined; - var bigint = try lhs_val.toBigInt(&bigint_space).toManaged(self.allocator); - defer bigint.deinit(); - const zcmp = lhs_val.orderAgainstZero(); - if (lhs_val.floatHasFraction()) { - switch (op) { - .eq => return self.constBool(scope, src, false), - .neq => return self.constBool(scope, src, true), - else => {}, - } - if (zcmp == .lt) { - try bigint.addScalar(bigint.toConst(), -1); - } else { - try bigint.addScalar(bigint.toConst(), 1); - } - } - lhs_bits = bigint.toConst().bitCountTwosComp(); - break :x (zcmp != .lt); - } else x: { - lhs_bits = lhs_val.intBitCountTwosComp(); - break :x (lhs_val.orderAgainstZero() != .lt); - }; - lhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); - } else if (lhs_is_float) { - dest_float_type = lhs.ty; - } else { - const int_info = lhs.ty.intInfo(self.target()); - lhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); - } - - var rhs_bits: usize = undefined; - if (rhs.value()) |rhs_val| { - if (rhs_val.isUndef()) - return self.constUndef(scope, src, Type.initTag(.bool)); - const is_unsigned = if (rhs_is_float) x: { - var bigint_space: Value.BigIntSpace = undefined; - var bigint = try rhs_val.toBigInt(&bigint_space).toManaged(self.allocator); - defer bigint.deinit(); - const zcmp = rhs_val.orderAgainstZero(); - if (rhs_val.floatHasFraction()) { - switch (op) { - .eq => return self.constBool(scope, src, false), - .neq => return self.constBool(scope, src, true), - else => {}, - } - if (zcmp == .lt) { - try bigint.addScalar(bigint.toConst(), -1); - } else { - try bigint.addScalar(bigint.toConst(), 1); - } - } - rhs_bits = bigint.toConst().bitCountTwosComp(); - break :x (zcmp != .lt); - } else x: { - rhs_bits = rhs_val.intBitCountTwosComp(); - break :x (rhs_val.orderAgainstZero() != .lt); - }; - rhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); - } else if (rhs_is_float) { - dest_float_type = rhs.ty; - } else { - const int_info = rhs.ty.intInfo(self.target()); - rhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); - } - - const dest_type = if (dest_float_type) |ft| ft else blk: { - const max_bits = std.math.max(lhs_bits, rhs_bits); - const casted_bits = std.math.cast(u16, max_bits) catch |err| switch (err) { - error.Overflow => return self.fail(scope, src, "{} exceeds maximum integer bit count", .{max_bits}), - }; - break :blk try self.makeIntType(scope, dest_int_is_signed, casted_bits); - }; - const casted_lhs = try self.coerce(scope, dest_type, lhs); - const casted_rhs = try self.coerce(scope, dest_type, lhs); - - return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ - .lhs = casted_lhs, - .rhs = casted_rhs, - .op = op, - }); - } - - fn makeIntType(self: *Module, scope: *Scope, signed: bool, bits: u16) !Type { - if (signed) { - const int_payload = try scope.arena().create(Type.Payload.IntSigned); - int_payload.* = .{ .bits = bits }; - return Type.initPayload(&int_payload.base); - } else { - const int_payload = try scope.arena().create(Type.Payload.IntUnsigned); - int_payload.* = .{ .bits = bits }; - return Type.initPayload(&int_payload.base); - } - } - - fn coerce(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { - // If the types are the same, we can return the operand. - if (dest_type.eql(inst.ty)) - return inst; - - const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty); - if (in_memory_result == .ok) { - return self.bitcast(scope, dest_type, inst); - } - - // *[N]T to []T - if (inst.ty.isSinglePointer() and dest_type.isSlice() and - (!inst.ty.pointerIsConst() or dest_type.pointerIsConst())) - { - const array_type = inst.ty.elemType(); - const dst_elem_type = dest_type.elemType(); - if (array_type.zigTypeTag() == .Array and - coerceInMemoryAllowed(dst_elem_type, array_type.elemType()) == .ok) - { - return self.coerceArrayPtrToSlice(scope, dest_type, inst); - } - } - - // comptime_int to fixed-width integer - if (inst.ty.zigTypeTag() == .ComptimeInt and dest_type.zigTypeTag() == .Int) { - // The representation is already correct; we only need to make sure it fits in the destination type. - const val = inst.value().?; // comptime_int always has comptime known value - if (!val.intFitsInType(dest_type, self.target())) { - return self.fail(scope, inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); - } - return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); - } - - // integer widening - if (inst.ty.zigTypeTag() == .Int and dest_type.zigTypeTag() == .Int) { - const src_info = inst.ty.intInfo(self.target()); - const dst_info = dest_type.intInfo(self.target()); - if (src_info.signed == dst_info.signed and dst_info.bits >= src_info.bits) { - if (inst.value()) |val| { - return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); - } else { - return self.fail(scope, inst.src, "TODO implement runtime integer widening", .{}); - } - } else { - return self.fail(scope, inst.src, "TODO implement more int widening {} to {}", .{ inst.ty, dest_type }); - } - } - - return self.fail(scope, inst.src, "TODO implement type coercion from {} to {}", .{ inst.ty, dest_type }); - } - - fn bitcast(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { - if (inst.value()) |val| { - // Keep the comptime Value representation; take the new type. - return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); - } - // TODO validate the type size and other compile errors - const b = try self.requireRuntimeBlock(scope, inst.src); - return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst }); - } - - fn coerceArrayPtrToSlice(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { - if (inst.value()) |val| { - // The comptime Value representation is compatible with both types. - return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); - } - return self.fail(scope, inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); - } - - fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { - @setCold(true); - try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); - try self.failed_files.ensureCapacity(self.failed_files.size + 1); - const err_msg = try ErrorMsg.create(self.allocator, src, format, args); - switch (scope.tag) { - .decl => { - const decl = scope.cast(Scope.DeclAnalysis).?.decl; - switch (decl.analysis) { - .initial_in_progress => decl.analysis = .initial_sema_failure, - .repeat_in_progress => decl.analysis = .repeat_sema_failure, - else => unreachable, - } - self.failed_decls.putAssumeCapacityNoClobber(decl, err_msg); - }, - .block => { - const block = scope.cast(Scope.Block).?; - block.func.analysis = .sema_failure; - self.failed_decls.putAssumeCapacityNoClobber(block.decl, err_msg); - }, - .zir_module => { - const zir_module = scope.cast(Scope.ZIRModule).?; - zir_module.status = .loaded_sema_failure; - self.failed_files.putAssumeCapacityNoClobber(zir_module, err_msg); - }, - } - return error.AnalysisFail; - } - - const InMemoryCoercionResult = enum { - ok, - no_match, - }; - - fn coerceInMemoryAllowed(dest_type: Type, src_type: Type) InMemoryCoercionResult { - if (dest_type.eql(src_type)) - return .ok; - - // TODO: implement more of this function - - return .no_match; - } -}; - -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, - - pub fn create(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !*ErrorMsg { - const self = try allocator.create(ErrorMsg); - errdefer allocator.destroy(self); - self.* = try init(allocator, byte_offset, format, args); - return self; - } - - /// Assumes the ErrorMsg struct and msg were both allocated with allocator. - pub fn destroy(self: *ErrorMsg, allocator: *Allocator) void { - self.deinit(allocator); - allocator.destroy(self); - } - - pub fn init(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !ErrorMsg { - return ErrorMsg{ - .byte_offset = byte_offset, - .msg = try std.fmt.allocPrint(allocator, format, args), - }; - } - - pub fn deinit(self: *ErrorMsg, allocator: *Allocator) void { - allocator.free(self.msg); - self.* = undefined; - } -}; diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index f7237f4d60..8b24ef5335 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -3,6 +3,7 @@ const mem = std.mem; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ir = @import("ir.zig"); +const Module = @import("Module.zig"); const fs = std.fs; const elf = std.elf; const codegen = @import("codegen.zig"); @@ -45,8 +46,8 @@ pub fn writeFilePath( allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, - module: ir.Module, - errors: *std.ArrayList(ir.ErrorMsg), + module: Module, + errors: *std.ArrayList(Module.ErrorMsg), ) !void { const options: Options = .{ .target = module.target, @@ -755,7 +756,7 @@ pub const ElfFile = struct { }; } - pub fn allocateDeclIndexes(self: *ElfFile, decl: *ir.Module.Decl) !void { + pub fn allocateDeclIndexes(self: *ElfFile, decl: *Module.Decl) !void { if (decl.link.local_sym_index != 0) return; try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); @@ -784,7 +785,7 @@ pub const ElfFile = struct { }; } - pub fn updateDecl(self: *ElfFile, module: *ir.Module, decl: *ir.Module.Decl) !void { + pub fn updateDecl(self: *ElfFile, module: *Module, decl: *Module.Decl) !void { var code_buffer = std.ArrayList(u8).init(self.allocator); defer code_buffer.deinit(); @@ -878,16 +879,16 @@ pub const ElfFile = struct { try self.file.pwriteAll(code, file_offset); // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. - const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*ir.Module.Export{}; + const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*Module.Export{}; return self.updateDeclExports(module, decl, decl_exports); } /// Must be called only after a successful call to `updateDecl`. pub fn updateDeclExports( self: *ElfFile, - module: *ir.Module, - decl: *const ir.Module.Decl, - exports: []const *ir.Module.Export, + module: *Module, + decl: *const Module.Decl, + exports: []const *Module.Export, ) !void { try self.global_symbols.ensureCapacity(self.allocator, self.global_symbols.items.len + exports.len); const typed_value = decl.typed_value.most_recent.typed_value; @@ -900,7 +901,7 @@ pub const ElfFile = struct { try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); module.failed_exports.putAssumeCapacityNoClobber( exp, - try ir.ErrorMsg.create(self.allocator, 0, "Unimplemented: ExportOptions.section", .{}), + try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: ExportOptions.section", .{}), ); continue; } @@ -918,7 +919,7 @@ pub const ElfFile = struct { try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); module.failed_exports.putAssumeCapacityNoClobber( exp, - try ir.ErrorMsg.create(self.allocator, 0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), + try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), ); continue; }, diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 9215e37e8c..37825550da 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -6,9 +6,10 @@ const process = std.process; const Allocator = mem.Allocator; const ArrayList = std.ArrayList; const ast = std.zig.ast; -const ir = @import("ir.zig"); +const Module = @import("Module.zig"); const link = @import("link.zig"); const Package = @import("Package.zig"); +const zir = @import("zir.zig"); const LibCInstallation = @import("libc_installation.zig").LibCInstallation; @@ -438,7 +439,7 @@ fn buildOutputType( const root_pkg = try Package.create(gpa, fs.cwd(), ".", src_path); errdefer root_pkg.destroy(); - const root_scope = try gpa.create(ir.Module.Scope.ZIRModule); + const root_scope = try gpa.create(Module.Scope.ZIRModule); errdefer gpa.destroy(root_scope); root_scope.* = .{ .sub_file_path = root_pkg.root_src_path, @@ -447,19 +448,19 @@ fn buildOutputType( .status = .never_loaded, }; - break :blk ir.Module{ + break :blk Module{ .allocator = gpa, .root_pkg = root_pkg, .root_scope = root_scope, .bin_file = &bin_file, .optimize_mode = .Debug, - .decl_table = std.AutoHashMap(ir.Module.Decl.Hash, *ir.Module.Decl).init(gpa), - .decl_exports = std.AutoHashMap(*ir.Module.Decl, []*ir.Module.Export).init(gpa), - .export_owners = std.AutoHashMap(*ir.Module.Decl, []*ir.Module.Export).init(gpa), - .failed_decls = std.AutoHashMap(*ir.Module.Decl, *ir.ErrorMsg).init(gpa), - .failed_files = std.AutoHashMap(*ir.Module.Scope.ZIRModule, *ir.ErrorMsg).init(gpa), - .failed_exports = std.AutoHashMap(*ir.Module.Export, *ir.ErrorMsg).init(gpa), - .work_queue = std.fifo.LinearFifo(ir.Module.WorkItem, .Dynamic).init(gpa), + .decl_table = std.AutoHashMap(Module.Decl.Hash, *Module.Decl).init(gpa), + .decl_exports = std.AutoHashMap(*Module.Decl, []*Module.Export).init(gpa), + .export_owners = std.AutoHashMap(*Module.Decl, []*Module.Export).init(gpa), + .failed_decls = std.AutoHashMap(*Module.Decl, *Module.ErrorMsg).init(gpa), + .failed_files = std.AutoHashMap(*Module.Scope.ZIRModule, *Module.ErrorMsg).init(gpa), + .failed_exports = std.AutoHashMap(*Module.Export, *Module.ErrorMsg).init(gpa), + .work_queue = std.fifo.LinearFifo(Module.WorkItem, .Dynamic).init(gpa), }; }; defer module.deinit(); @@ -491,7 +492,7 @@ fn buildOutputType( } } -fn updateModule(gpa: *Allocator, module: *ir.Module, zir_out_path: ?[]const u8) !void { +fn updateModule(gpa: *Allocator, module: *Module, zir_out_path: ?[]const u8) !void { try module.update(); var errors = try module.getAllErrorsAlloc(); @@ -509,7 +510,7 @@ fn updateModule(gpa: *Allocator, module: *ir.Module, zir_out_path: ?[]const u8) } if (zir_out_path) |zop| { - var new_zir_module = try ir.text.emit_zir(gpa, module.*); + var new_zir_module = try zir.emit(gpa, module.*); defer new_zir_module.deinit(gpa); const baf = try io.BufferedAtomicFile.create(gpa, fs.cwd(), zop, .{}); diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 00db2b5ccc..df438360c8 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -6,7 +6,7 @@ const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; const Target = std.Target; const Allocator = std.mem.Allocator; -const ir = @import("ir.zig"); +const Module = @import("Module.zig"); /// This is the raw data, with no bookkeeping, no memory awareness, /// no de-duplication, and no type system awareness. @@ -904,7 +904,7 @@ pub const Value = extern union { pub const Function = struct { base: Payload = Payload{ .tag = .function }, - func: *ir.Module.Fn, + func: *Module.Fn, }; pub const ArraySentinel0_u8_Type = struct { @@ -926,7 +926,7 @@ pub const Value = extern union { /// Represents a pointer to a decl, not the value of the decl. pub const DeclRef = struct { base: Payload = Payload{ .tag = .decl_ref }, - decl: *ir.Module.Decl, + decl: *Module.Decl, }; pub const ElemPtr = struct { diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/zir.zig similarity index 99% rename from src-self-hosted/ir/text.zig rename to src-self-hosted/zir.zig index d8b5af4e2c..ba6c3299f6 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/zir.zig @@ -6,10 +6,11 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; -const Type = @import("../type.zig").Type; -const Value = @import("../value.zig").Value; -const TypedValue = @import("../TypedValue.zig"); -const ir = @import("../ir.zig"); +const Type = @import("type.zig").Type; +const Value = @import("value.zig").Value; +const TypedValue = @import("TypedValue.zig"); +const ir = @import("ir.zig"); +const IrModule = @import("Module.zig"); /// These are instructions that correspond to the ZIR text format. See `ir.Inst` for /// in-memory, analyzed instructions with types and values. @@ -990,7 +991,7 @@ const Parser = struct { } }; -pub fn emit_zir(allocator: *Allocator, old_module: ir.Module) !Module { +pub fn emit(allocator: *Allocator, old_module: IrModule) !Module { var ctx: EmitZIR = .{ .allocator = allocator, .decls = .{}, @@ -1013,7 +1014,7 @@ pub fn emit_zir(allocator: *Allocator, old_module: ir.Module) !Module { const EmitZIR = struct { allocator: *Allocator, arena: std.heap.ArenaAllocator, - old_module: *const ir.Module, + old_module: *const IrModule, decls: std.ArrayListUnmanaged(*Inst), decl_table: std.AutoHashMap(*ir.Inst, *Inst), @@ -1171,7 +1172,7 @@ const EmitZIR = struct { fn emitBody( self: *EmitZIR, - body: ir.Module.Body, + body: IrModule.Body, inst_table: *std.AutoHashMap(*ir.Inst, *Inst), instructions: *std.ArrayList(*Inst), ) Allocator.Error!void { From 294bfb3321fe82371d57d39137615dddfef9af4b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 15 May 2020 23:54:13 -0400 Subject: [PATCH 18/31] stage2 zir tests passing --- src-self-hosted/Module.zig | 54 +++++++++++++++++++-- src-self-hosted/main.zig | 49 ++++++------------- src-self-hosted/test.zig | 96 +++++++++++++++----------------------- 3 files changed, 102 insertions(+), 97 deletions(-) diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index ef85f50e31..10127627ca 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -18,12 +18,11 @@ const Inst = ir.Inst; /// General-purpose allocator. allocator: *Allocator, -/// Module owns this resource. +/// Pointer to externally managed resource. root_pkg: *Package, /// Module owns this resource. root_scope: *Scope.ZIRModule, -/// Pointer to externally managed resource. -bin_file: *link.ElfFile, +bin_file: link.ElfFile, /// It's rare for a decl to be exported, so we save memory by having a sparse map of /// Decl pointers to details about them being exported. /// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. @@ -422,7 +421,55 @@ pub const AllErrors = struct { } }; +pub const InitOptions = struct { + target: std.Target, + root_pkg: *Package, + output_mode: std.builtin.OutputMode, + bin_file_dir: ?std.fs.Dir = null, + bin_file_path: []const u8, + link_mode: ?std.builtin.LinkMode = null, + object_format: ?std.builtin.ObjectFormat = null, + optimize_mode: std.builtin.Mode = .Debug, +}; + +pub fn init(gpa: *Allocator, options: InitOptions) !Module { + const root_scope = try gpa.create(Scope.ZIRModule); + errdefer gpa.destroy(root_scope); + + root_scope.* = .{ + .sub_file_path = options.root_pkg.root_src_path, + .source = .{ .unloaded = {} }, + .contents = .{ .not_available = {} }, + .status = .never_loaded, + }; + + const bin_file_dir = options.bin_file_dir orelse std.fs.cwd(); + var bin_file = try link.openBinFilePath(gpa, bin_file_dir, options.bin_file_path, .{ + .target = options.target, + .output_mode = options.output_mode, + .link_mode = options.link_mode orelse .Static, + .object_format = options.object_format orelse options.target.getObjectFormat(), + }); + errdefer bin_file.deinit(); + + return Module{ + .allocator = gpa, + .root_pkg = options.root_pkg, + .root_scope = root_scope, + .bin_file = bin_file, + .optimize_mode = options.optimize_mode, + .decl_table = std.AutoHashMap(Decl.Hash, *Decl).init(gpa), + .decl_exports = std.AutoHashMap(*Decl, []*Export).init(gpa), + .export_owners = std.AutoHashMap(*Decl, []*Export).init(gpa), + .failed_decls = std.AutoHashMap(*Decl, *ErrorMsg).init(gpa), + .failed_files = std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg).init(gpa), + .failed_exports = std.AutoHashMap(*Export, *ErrorMsg).init(gpa), + .work_queue = std.fifo.LinearFifo(WorkItem, .Dynamic).init(gpa), + }; +} + pub fn deinit(self: *Module) void { + self.bin_file.deinit(); const allocator = self.allocator; self.work_queue.deinit(); { @@ -472,7 +519,6 @@ pub fn deinit(self: *Module) void { } self.export_owners.deinit(); } - self.root_pkg.destroy(); { self.root_scope.deinit(allocator); allocator.destroy(self.root_scope); diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 37825550da..162e535a05 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -157,7 +157,7 @@ fn buildOutputType( var color: Color = .Auto; var build_mode: std.builtin.Mode = .Debug; var provided_name: ?[]const u8 = null; - var is_dynamic = false; + var link_mode: ?std.builtin.LinkMode = null; var root_src_file: ?[]const u8 = null; var version: std.builtin.Version = .{ .major = 0, .minor = 0, .patch = 0 }; var strip = false; @@ -286,7 +286,9 @@ fn buildOutputType( } else if (mem.eql(u8, arg, "-fno-emit-zir")) { emit_zir = .no; } else if (mem.eql(u8, arg, "-dynamic")) { - is_dynamic = true; + link_mode = .Dynamic; + } else if (mem.eql(u8, arg, "-static")) { + link_mode = .Static; } else if (mem.eql(u8, arg, "--strip")) { strip = true; } else if (mem.eql(u8, arg, "--debug-tokenize")) { @@ -427,42 +429,19 @@ fn buildOutputType( .yes => |p| p, }; - var bin_file = try link.openBinFilePath(gpa, fs.cwd(), bin_path, .{ + const root_pkg = try Package.create(gpa, fs.cwd(), ".", src_path); + defer root_pkg.destroy(); + + var module = try Module.init(gpa, .{ .target = target_info.target, .output_mode = output_mode, - .link_mode = if (is_dynamic) .Dynamic else .Static, - .object_format = object_format orelse target_info.target.getObjectFormat(), + .root_pkg = root_pkg, + .bin_file_dir = fs.cwd(), + .bin_file_path = bin_path, + .link_mode = link_mode, + .object_format = object_format, + .optimize_mode = build_mode, }); - defer bin_file.deinit(); - - var module = blk: { - const root_pkg = try Package.create(gpa, fs.cwd(), ".", src_path); - errdefer root_pkg.destroy(); - - const root_scope = try gpa.create(Module.Scope.ZIRModule); - errdefer gpa.destroy(root_scope); - root_scope.* = .{ - .sub_file_path = root_pkg.root_src_path, - .source = .{ .unloaded = {} }, - .contents = .{ .not_available = {} }, - .status = .never_loaded, - }; - - break :blk Module{ - .allocator = gpa, - .root_pkg = root_pkg, - .root_scope = root_scope, - .bin_file = &bin_file, - .optimize_mode = .Debug, - .decl_table = std.AutoHashMap(Module.Decl.Hash, *Module.Decl).init(gpa), - .decl_exports = std.AutoHashMap(*Module.Decl, []*Module.Export).init(gpa), - .export_owners = std.AutoHashMap(*Module.Decl, []*Module.Export).init(gpa), - .failed_decls = std.AutoHashMap(*Module.Decl, *Module.ErrorMsg).init(gpa), - .failed_files = std.AutoHashMap(*Module.Scope.ZIRModule, *Module.ErrorMsg).init(gpa), - .failed_exports = std.AutoHashMap(*Module.Export, *Module.ErrorMsg).init(gpa), - .work_queue = std.fifo.LinearFifo(Module.WorkItem, .Dynamic).init(gpa), - }; - }; defer module.deinit(); const stdin = std.io.getStdIn().inStream(); diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig index bac016e1a4..acfd7a0811 100644 --- a/src-self-hosted/test.zig +++ b/src-self-hosted/test.zig @@ -1,7 +1,9 @@ const std = @import("std"); const link = @import("link.zig"); -const ir = @import("ir.zig"); +const Module = @import("Module.zig"); const Allocator = std.mem.Allocator; +const zir = @import("zir.zig"); +const Package = @import("Package.zig"); test "self-hosted" { var ctx: TestContext = undefined; @@ -98,52 +100,31 @@ pub const TestContext = struct { var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); - var prg_node = root_node.start(case.name, 4); + var prg_node = root_node.start(case.name, 2); prg_node.activate(); defer prg_node.end(); - var zir_module = x: { - var parse_node = prg_node.start("parse", null); - parse_node.activate(); - defer parse_node.end(); + const tmp_src_path = "test-case.zir"; + try tmp.dir.writeFile(tmp_src_path, case.src); - break :x try ir.text.parse(allocator, case.src); - }; - defer zir_module.deinit(allocator); - if (zir_module.errors.len != 0) { - debugPrintErrors(case.src, zir_module.errors); - return error.ParseFailure; - } + const root_pkg = try Package.create(allocator, tmp.dir, ".", tmp_src_path); + defer root_pkg.destroy(); - var analyzed_module = x: { - var analyze_node = prg_node.start("analyze", null); - analyze_node.activate(); - defer analyze_node.end(); - - break :x try ir.analyze(allocator, zir_module, .{ + { + var module = try Module.init(allocator, .{ .target = target, .output_mode = .Exe, - .link_mode = .Static, .optimize_mode = .Debug, + .bin_file_dir = tmp.dir, + .bin_file_path = "a.out", + .root_pkg = root_pkg, }); - }; - defer analyzed_module.deinit(allocator); - if (analyzed_module.errors.len != 0) { - debugPrintErrors(case.src, analyzed_module.errors); - return error.ParseFailure; - } + defer module.deinit(); - var link_result = x: { - var link_node = prg_node.start("link", null); - link_node.activate(); - defer link_node.end(); - - break :x try link.updateFilePath(allocator, analyzed_module, tmp.dir, "a.out"); - }; - defer link_result.deinit(allocator); - if (link_result.errors.len != 0) { - debugPrintErrors(case.src, link_result.errors); - return error.LinkFailure; + var module_node = prg_node.start("parse,analysis,codegen", null); + module_node.activate(); + try module.update(); + module_node.end(); } var exec_result = x: { @@ -178,38 +159,37 @@ pub const TestContext = struct { case: ZIRTransformCase, target: std.Target, ) !void { - var prg_node = root_node.start(case.name, 4); + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var prg_node = root_node.start(case.name, 3); prg_node.activate(); defer prg_node.end(); - var parse_node = prg_node.start("parse", null); - parse_node.activate(); - var zir_module = try ir.text.parse(allocator, case.src); - defer zir_module.deinit(allocator); - if (zir_module.errors.len != 0) { - debugPrintErrors(case.src, zir_module.errors); - return error.ParseFailure; - } - parse_node.end(); + const tmp_src_path = "test-case.zir"; + try tmp.dir.writeFile(tmp_src_path, case.src); - var analyze_node = prg_node.start("analyze", null); - analyze_node.activate(); - var analyzed_module = try ir.analyze(allocator, zir_module, .{ + const root_pkg = try Package.create(allocator, tmp.dir, ".", tmp_src_path); + defer root_pkg.destroy(); + + var module = try Module.init(allocator, .{ .target = target, .output_mode = .Obj, - .link_mode = .Static, .optimize_mode = .Debug, + .bin_file_dir = tmp.dir, + .bin_file_path = "test-case.o", + .root_pkg = root_pkg, }); - defer analyzed_module.deinit(allocator); - if (analyzed_module.errors.len != 0) { - debugPrintErrors(case.src, analyzed_module.errors); - return error.ParseFailure; - } - analyze_node.end(); + defer module.deinit(); + + var module_node = prg_node.start("parse/analysis/codegen", null); + module_node.activate(); + try module.update(); + module_node.end(); var emit_node = prg_node.start("emit", null); emit_node.activate(); - var new_zir_module = try ir.text.emit_zir(allocator, analyzed_module); + var new_zir_module = try zir.emit(allocator, module); defer new_zir_module.deinit(allocator); emit_node.end(); From a286b5de38617809db58f918a81a650b41fbdd49 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 16 May 2020 01:22:56 -0400 Subject: [PATCH 19/31] build.zig: -Dlib-files-only skips installing self-hosted --- build.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build.zig b/build.zig index b57ae69638..3636da4f28 100644 --- a/build.zig +++ b/build.zig @@ -53,7 +53,6 @@ pub fn build(b: *Builder) !void { exe.setBuildMode(mode); test_step.dependOn(&exe.step); b.default_step.dependOn(&exe.step); - exe.install(); const skip_release = b.option(bool, "skip-release", "Main test suite skips release builds") orelse false; const skip_release_small = b.option(bool, "skip-release-small", "Main test suite skips release-small builds") orelse skip_release; @@ -70,6 +69,9 @@ pub fn build(b: *Builder) !void { try configureStage2(b, exe, ctx); } + if (!only_install_lib_files) { + exe.install(); + } const link_libc = b.option(bool, "force-link-libc", "Force self-hosted compiler to link libc") orelse false; if (link_libc) exe.linkLibC(); From cd5f69794d63ece18cd8f8aa0e2ce8bc16a31ab7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 16 May 2020 12:19:31 -0400 Subject: [PATCH 20/31] cross compile the stage2 tests for the target that they work for --- src-self-hosted/test.zig | 6 +++++- test/stage2/zir.zig | 14 +++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig index acfd7a0811..9bb0b8846b 100644 --- a/src-self-hosted/test.zig +++ b/src-self-hosted/test.zig @@ -29,6 +29,7 @@ pub const TestContext = struct { name: []const u8, src: [:0]const u8, expected_zir: []const u8, + cross_target: std.zig.CrossTarget, }; pub fn addZIRCompareOutput( @@ -47,6 +48,7 @@ pub const TestContext = struct { pub fn addZIRTransform( ctx: *TestContext, name: []const u8, + cross_target: std.zig.CrossTarget, src: [:0]const u8, expected_zir: []const u8, ) void { @@ -54,6 +56,7 @@ pub const TestContext = struct { .name = name, .src = src, .expected_zir = expected_zir, + .cross_target = cross_target, }) catch unreachable; } @@ -85,7 +88,8 @@ pub const TestContext = struct { } for (self.zir_transform_cases.items) |case| { std.testing.base_allocator_instance.reset(); - try self.runOneZIRTransformCase(std.testing.allocator, root_node, case, native_info.target); + const info = try std.zig.system.NativeTargetInfo.detect(std.testing.allocator, case.cross_target); + try self.runOneZIRTransformCase(std.testing.allocator, root_node, case, info.target); try std.testing.allocator_instance.validate(); } } diff --git a/test/stage2/zir.zig b/test/stage2/zir.zig index 78b6d3c1b2..868ded42ed 100644 --- a/test/stage2/zir.zig +++ b/test/stage2/zir.zig @@ -1,7 +1,15 @@ +const std = @import("std"); const TestContext = @import("../../src-self-hosted/test.zig").TestContext; +// self-hosted does not yet support PE executable files / COFF object files +// or mach-o files. So we do the ZIR transform test cases cross compiling for +// x86_64-linux. +const linux_x64 = std.zig.CrossTarget{ + .cpu_arch = .x86_64, + .os_tag = .linux, +}; pub fn addCases(ctx: *TestContext) void { - ctx.addZIRTransform("elemptr, add, cmp, condbr, return, breakpoint", + ctx.addZIRTransform("elemptr, add, cmp, condbr, return, breakpoint", linux_x64, \\@void = primitive(void) \\@usize = primitive(usize) \\@fnty = fntype([], @void, cc=C) @@ -49,8 +57,8 @@ pub fn addCases(ctx: *TestContext) void { \\ ); - if (@import("std").Target.current.os.tag != .linux or - @import("std").Target.current.cpu.arch != .x86_64) + if (std.Target.current.os.tag != .linux or + std.Target.current.cpu.arch != .x86_64) { // TODO implement self-hosted PE (.exe file) linking // TODO implement more ZIR so we don't depend on x86_64-linux From b0375978ba54bc80c691eaf118f3a78e7f8920a7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 16 May 2020 13:25:39 -0400 Subject: [PATCH 21/31] self-hosted: remove `zig libc` command for now Since it depends on the C++ Windows SDK code. For now, self-hosted is staying pure self hosted, no C/C++ components. --- src-self-hosted/main.zig | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 162e535a05..40d046a256 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -11,8 +11,6 @@ const link = @import("link.zig"); const Package = @import("Package.zig"); const zir = @import("zir.zig"); -const LibCInstallation = @import("libc_installation.zig").LibCInstallation; - // TODO Improve async I/O enough that we feel comfortable doing this. //pub const io_mode = .evented; @@ -33,7 +31,6 @@ const usage = \\ build-lib [source] Create library from source or object files \\ build-obj [source] Create object from source or assembly \\ fmt [source] Parse file and render in canonical zig format - \\ libc [paths_file] Display native libc paths file or validate one \\ targets List available compilation targets \\ version Print version number and exit \\ zen Print zen of zig and exit @@ -65,8 +62,6 @@ pub fn main() !void { return buildOutputType(gpa, arena, cmd_args, .Obj); } else if (mem.eql(u8, cmd, "fmt")) { return cmdFmt(gpa, cmd_args); - } else if (mem.eql(u8, cmd, "libc")) { - return cmdLibC(gpa, cmd_args); } else if (mem.eql(u8, cmd, "targets")) { const info = try std.zig.system.NativeTargetInfo.detect(arena, .{}); const stdout = io.getStdOut().outStream(); @@ -535,41 +530,6 @@ const Fmt = struct { const SeenMap = std.BufSet; }; -fn parseLibcPaths(gpa: *Allocator, libc: *LibCInstallation, libc_paths_file: []const u8) void { - const stderr = io.getStdErr().outStream(); - libc.* = LibCInstallation.parse(gpa, libc_paths_file, stderr) catch |err| { - stderr.print("Unable to parse libc path file '{}': {}.\n" ++ - "Try running `zig libc` to see an example for the native target.\n", .{ - libc_paths_file, - @errorName(err), - }) catch {}; - process.exit(1); - }; -} - -fn cmdLibC(gpa: *Allocator, args: []const []const u8) !void { - const stderr = io.getStdErr().outStream(); - switch (args.len) { - 0 => {}, - 1 => { - var libc_installation: LibCInstallation = undefined; - parseLibcPaths(gpa, &libc_installation, args[0]); - return; - }, - else => { - try stderr.print("unexpected extra parameter: {}\n", .{args[1]}); - process.exit(1); - }, - } - - const libc = LibCInstallation.findNative(.{ .allocator = gpa }) catch |err| { - stderr.print("unable to find libc: {}\n", .{@errorName(err)}) catch {}; - process.exit(1); - }; - - libc.render(io.getStdOut().outStream()) catch process.exit(1); -} - pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void { const stderr_file = io.getStdErr(); var color: Color = .Auto; From 017ecc5148da3f3f50f5666d635c22dfb6bfffb2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 16 May 2020 15:44:20 -0400 Subject: [PATCH 22/31] self hosted repl: close executables between updates This allows the executable to be executed --- src-self-hosted/Module.zig | 18 ++++++++++- src-self-hosted/link.zig | 65 ++++++++++++++++++++++++-------------- src-self-hosted/main.zig | 6 ++++ 3 files changed, 65 insertions(+), 24 deletions(-) diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 10127627ca..72a4e1b410 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -23,6 +23,8 @@ root_pkg: *Package, /// Module owns this resource. root_scope: *Scope.ZIRModule, bin_file: link.ElfFile, +bin_file_dir: std.fs.Dir, +bin_file_path: []const u8, /// It's rare for a decl to be exported, so we save memory by having a sparse map of /// Decl pointers to details about them being exported. /// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. @@ -456,6 +458,8 @@ pub fn init(gpa: *Allocator, options: InitOptions) !Module { .allocator = gpa, .root_pkg = options.root_pkg, .root_scope = root_scope, + .bin_file_dir = bin_file_dir, + .bin_file_path = options.bin_file_path, .bin_file = bin_file, .optimize_mode = options.optimize_mode, .decl_table = std.AutoHashMap(Decl.Hash, *Decl).init(gpa), @@ -551,6 +555,18 @@ pub fn update(self: *Module) !void { self.link_error_flags = self.bin_file.error_flags; } +/// Having the file open for writing is problematic as far as executing the +/// binary is concerned. This will remove the write flag, or close the file, +/// or whatever is needed so that it can be executed. +/// After this, one must call` makeFileWritable` before calling `update`. +pub fn makeBinFileExecutable(self: *Module) !void { + return self.bin_file.makeExecutable(); +} + +pub fn makeBinFileWritable(self: *Module) !void { + return self.bin_file.makeWritable(self.bin_file_dir, self.bin_file_path); +} + pub fn totalErrorCount(self: *Module) usize { return self.failed_decls.size + self.failed_files.size + @@ -759,7 +775,7 @@ fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { const new_contents_hash = Decl.hashSimpleName(src_decl.contents); if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) { // TODO recursive dependency management - std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); + //std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); self.decl_table.removeAssertDiscard(name_hash); const saved_link = decl.link; decl.destroy(self.allocator); diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 8b24ef5335..a0c7ec8493 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -91,7 +91,7 @@ pub fn openBinFile(allocator: *Allocator, file: fs.File, options: Options) !ElfF pub const ElfFile = struct { allocator: *Allocator, - file: fs.File, + file: ?fs.File, owns_file_handle: bool, options: Options, ptr_width: enum { p32, p64 }, @@ -170,8 +170,27 @@ pub const ElfFile = struct { self.local_symbols.deinit(self.allocator); self.global_symbols.deinit(self.allocator); self.offset_table.deinit(self.allocator); - if (self.owns_file_handle) - self.file.close(); + if (self.owns_file_handle) { + if (self.file) |f| f.close(); + } + } + + pub fn makeExecutable(self: *ElfFile) !void { + assert(self.owns_file_handle); + if (self.file) |f| { + f.close(); + self.file = null; + } + } + + pub fn makeWritable(self: *ElfFile, dir: fs.Dir, sub_path: []const u8) !void { + assert(self.owns_file_handle); + if (self.file != null) return; + self.file = try dir.createFile(sub_path, .{ + .truncate = false, + .read = true, + .mode = determineMode(self.options), + }); } // `alloc_num / alloc_den` is the factor of padding when allocation @@ -467,7 +486,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf32_Phdr, phdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, .p64 => { const buf = try self.allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len); @@ -479,7 +498,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf64_Phdr, phdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, } self.phdr_table_dirty = false; @@ -498,7 +517,7 @@ pub const ElfFile = struct { shstrtab_sect.sh_size = needed_size; //std.debug.warn("shstrtab start=0x{x} end=0x{x}\n", .{ shstrtab_sect.sh_offset, shstrtab_sect.sh_offset + needed_size }); - try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset); + try self.file.?.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset); if (!self.shdr_table_dirty) { // Then it won't get written with the others and we need to do it. try self.writeSectHeader(self.shstrtab_index.?); @@ -534,7 +553,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf32_Shdr, shdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, .p64 => { const buf = try self.allocator.alloc(elf.Elf64_Shdr, self.sections.items.len); @@ -547,7 +566,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf64_Shdr, shdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, } self.shdr_table_dirty = false; @@ -687,7 +706,7 @@ pub const ElfFile = struct { assert(index == e_ehsize); - try self.file.pwriteAll(hdr_buf[0..index], 0); + try self.file.?.pwriteAll(hdr_buf[0..index], 0); } const AllocatedBlock = struct { @@ -718,7 +737,7 @@ pub const ElfFile = struct { // Must move the entire text section. const new_offset = self.findFreeSpace(needed_size, 0x1000); const text_size = (last_start + last_size) - phdr.p_vaddr; - const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, text_size); + const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, text_size); if (amt != text_size) return error.InputOutput; shdr.sh_offset = new_offset; } @@ -876,7 +895,7 @@ pub const ElfFile = struct { } }; - try self.file.pwriteAll(code, file_offset); + try self.file.?.pwriteAll(code, file_offset); // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*Module.Export{}; @@ -962,14 +981,14 @@ pub const ElfFile = struct { if (foreign_endian) { bswapAllFields(elf.Elf32_Phdr, &phdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&phdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, 64 => { var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Phdr, &phdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&phdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, else => return error.UnsupportedArchitecture, } @@ -985,14 +1004,14 @@ pub const ElfFile = struct { if (foreign_endian) { bswapAllFields(elf.Elf32_Shdr, &shdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&shdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); }, 64 => { var shdr = [1]elf.Elf64_Shdr{self.sections.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Shdr, &shdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&shdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); }, else => return error.UnsupportedArchitecture, } @@ -1012,7 +1031,7 @@ pub const ElfFile = struct { if (needed_size > allocated_size) { // Must move the entire got section. const new_offset = self.findFreeSpace(needed_size, entry_size); - const amt = try self.file.copyRangeAll(shdr.sh_offset, self.file, new_offset, shdr.sh_size); + const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, shdr.sh_size); if (amt != shdr.sh_size) return error.InputOutput; shdr.sh_offset = new_offset; } @@ -1031,12 +1050,12 @@ pub const ElfFile = struct { .p32 => { var buf: [4]u8 = undefined; mem.writeInt(u32, &buf, @intCast(u32, self.offset_table.items[index]), endian); - try self.file.pwriteAll(&buf, off); + try self.file.?.pwriteAll(&buf, off); }, .p64 => { var buf: [8]u8 = undefined; mem.writeInt(u64, &buf, self.offset_table.items[index], endian); - try self.file.pwriteAll(&buf, off); + try self.file.?.pwriteAll(&buf, off); }, } } @@ -1059,7 +1078,7 @@ pub const ElfFile = struct { // Move all the symbols to a new file location. const new_offset = self.findFreeSpace(needed_size, sym_align); const existing_size = @as(u64, syms_sect.sh_info) * sym_size; - const amt = try self.file.copyRangeAll(syms_sect.sh_offset, self.file, new_offset, existing_size); + const amt = try self.file.?.copyRangeAll(syms_sect.sh_offset, self.file.?, new_offset, existing_size); if (amt != existing_size) return error.InputOutput; syms_sect.sh_offset = new_offset; } @@ -1084,7 +1103,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf32_Sym, &sym[0]); } const off = syms_sect.sh_offset + @sizeOf(elf.Elf32_Sym) * index; - try self.file.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); + try self.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); }, .p64 => { var sym = [1]elf.Elf64_Sym{self.local_symbols.items[index]}; @@ -1092,7 +1111,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf64_Sym, &sym[0]); } const off = syms_sect.sh_offset + @sizeOf(elf.Elf64_Sym) * index; - try self.file.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); + try self.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); }, } } @@ -1124,7 +1143,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf32_Sym, sym); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); }, .p64 => { const buf = try self.allocator.alloc(elf.Elf64_Sym, self.global_symbols.items.len); @@ -1143,7 +1162,7 @@ pub const ElfFile = struct { bswapAllFields(elf.Elf64_Sym, sym); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); }, } } diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 40d046a256..4ef4acc24b 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -447,11 +447,17 @@ fn buildOutputType( while (watch) { try stderr.print("🦎 ", .{}); + if (output_mode == .Exe) { + try module.makeBinFileExecutable(); + } if (stdin.readUntilDelimiterOrEof(&repl_buf, '\n') catch |err| { try stderr.print("\nUnable to parse command: {}\n", .{@errorName(err)}); continue; }) |line| { if (mem.eql(u8, line, "update")) { + if (output_mode == .Exe) { + try module.makeBinFileWritable(); + } try updateModule(gpa, &module, zir_out_path); } else if (mem.eql(u8, line, "exit")) { break; From 54820a3005f25e1e542d8add39f184ed1e1eddba Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 16 May 2020 20:23:15 -0400 Subject: [PATCH 23/31] fix source not being loaded when printing errors --- src-self-hosted/Module.zig | 73 ++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 72a4e1b410..1ff551ecbf 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -307,7 +307,7 @@ pub const Scope = struct { /// Relative to the owning package's root_src_dir. /// Reference to external memory, not owned by ZIRModule. sub_file_path: []const u8, - source: union { + source: union(enum) { unloaded: void, bytes: [:0]const u8, }, @@ -320,7 +320,7 @@ pub const Scope = struct { unloaded_success, unloaded_parse_failure, unloaded_sema_failure, - loaded_parse_failure, + loaded_sema_failure, loaded_success, }, @@ -334,21 +334,22 @@ pub const Scope = struct { => {}, .loaded_success => { - allocator.free(self.source.bytes); self.contents.module.deinit(allocator); allocator.destroy(self.contents.module); self.status = .unloaded_success; }, .loaded_sema_failure => { - allocator.free(self.source.bytes); self.contents.module.deinit(allocator); allocator.destroy(self.contents.module); self.status = .unloaded_sema_failure; }, - .loaded_parse_failure => { - allocator.free(self.source.bytes); - self.status = .unloaded_parse_failure; + } + switch (self.source) { + .bytes => |bytes| { + allocator.free(bytes); + self.source = .{ .unloaded = {} }; }, + .unloaded => {}, } } @@ -586,7 +587,7 @@ pub fn getAllErrorsAlloc(self: *Module) !AllErrors { while (it.next()) |kv| { const scope = kv.key; const err_msg = kv.value; - const source = scope.source.bytes; + const source = try self.getSource(scope); try AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg.*); } } @@ -595,7 +596,7 @@ pub fn getAllErrorsAlloc(self: *Module) !AllErrors { while (it.next()) |kv| { const decl = kv.key; const err_msg = kv.value; - const source = decl.scope.source.bytes; + const source = try self.getSource(decl.scope); try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); } } @@ -604,7 +605,7 @@ pub fn getAllErrorsAlloc(self: *Module) !AllErrors { while (it.next()) |kv| { const decl = kv.key.owner_decl; const err_msg = kv.value; - const source = decl.scope.source.bytes; + const source = try self.getSource(decl.scope); try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); } } @@ -684,20 +685,29 @@ pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { }; } +fn getSource(self: *Module, root_scope: *Scope.ZIRModule) ![:0]const u8 { + switch (root_scope.source) { + .unloaded => { + const source = try self.root_pkg.root_src_dir.readFileAllocOptions( + self.allocator, + root_scope.sub_file_path, + std.math.maxInt(u32), + 1, + 0, + ); + root_scope.source = .{ .bytes = source }; + return source; + }, + .bytes => |bytes| return bytes, + } +} + fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { switch (root_scope.status) { .never_loaded, .unloaded_success => { try self.failed_files.ensureCapacity(self.failed_files.size + 1); - var keep_source = false; - const source = try self.root_pkg.root_src_dir.readFileAllocOptions( - self.allocator, - self.root_pkg.root_src_path, - std.math.maxInt(u32), - 1, - 0, - ); - defer if (!keep_source) self.allocator.free(source); + const source = try self.getSource(root_scope); var keep_zir_module = false; const zir_module = try self.allocator.create(zir.Module); @@ -711,15 +721,11 @@ fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { root_scope, try ErrorMsg.create(self.allocator, src_err_msg.byte_offset, "{}", .{src_err_msg.msg}), ); - root_scope.status = .loaded_parse_failure; - root_scope.source = .{ .bytes = source }; - keep_source = true; + root_scope.status = .unloaded_parse_failure; return error.AnalysisFail; } root_scope.status = .loaded_success; - root_scope.source = .{ .bytes = source }; - keep_source = true; root_scope.contents = .{ .module = zir_module }; keep_zir_module = true; @@ -728,10 +734,9 @@ fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { .unloaded_parse_failure, .unloaded_sema_failure, - .loaded_parse_failure, - .loaded_sema_failure, => return error.AnalysisFail, - .loaded_success => return root_scope.contents.module, + + .loaded_success, .loaded_sema_failure => return root_scope.contents.module, } } @@ -760,10 +765,9 @@ fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { .unloaded_parse_failure, .unloaded_sema_failure, - .loaded_parse_failure, + .unloaded_success, .loaded_sema_failure, .loaded_success, - .unloaded_success, => { const src_module = try self.getSrcModule(root_scope); @@ -2008,9 +2012,16 @@ fn coerceArrayPtrToSlice(self: *Module, scope: *Scope, dest_type: Type, inst: *I fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { @setCold(true); - try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); - try self.failed_files.ensureCapacity(self.failed_files.size + 1); const err_msg = try ErrorMsg.create(self.allocator, src, format, args); + return self.failWithOwnedErrorMsg(scope, src, err_msg); +} + +fn failWithOwnedErrorMsg(self: *Module, scope: *Scope, src: usize, err_msg: *ErrorMsg) InnerError { + { + errdefer err_msg.destroy(self.allocator); + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_files.ensureCapacity(self.failed_files.size + 1); + } switch (scope.tag) { .decl => { const decl = scope.cast(Scope.DeclAnalysis).?.decl; From 13ea698a40acb88063e7e6bd633fc2848d3d3a05 Mon Sep 17 00:00:00 2001 From: Noam Preil Date: Sat, 16 May 2020 18:16:49 -0400 Subject: [PATCH 24/31] rework x64 genSetReg --- src-self-hosted/backend.zig | 2 + src-self-hosted/backend/x86.zig | 32 +++ src-self-hosted/backend/x86_64.zig | 49 ++++ src-self-hosted/codegen.zig | 443 ++++++++++++----------------- 4 files changed, 258 insertions(+), 268 deletions(-) create mode 100644 src-self-hosted/backend.zig create mode 100644 src-self-hosted/backend/x86.zig create mode 100644 src-self-hosted/backend/x86_64.zig diff --git a/src-self-hosted/backend.zig b/src-self-hosted/backend.zig new file mode 100644 index 0000000000..9222a6e3c3 --- /dev/null +++ b/src-self-hosted/backend.zig @@ -0,0 +1,2 @@ +pub const x86_64 = @import("backend/x86_64.zig"); +pub const x86 = @import("backend/x86.zig"); diff --git a/src-self-hosted/backend/x86.zig b/src-self-hosted/backend/x86.zig new file mode 100644 index 0000000000..df1bb459ea --- /dev/null +++ b/src-self-hosted/backend/x86.zig @@ -0,0 +1,32 @@ +// zig fmt: off +pub const Register = enum(u8) { + // 0 through 7, 32-bit registers. id is int value + eax, ecx, edx, ebx, esp, ebp, esi, edi, + + // 8-15, 16-bit registers. id is int value - 8. + ax, cx, dx, bx, sp, bp, si, di, + + // 16-23, 8-bit registers. id is int value - 16. + al, bl, cl, dl, ah, ch, dh, bh, + + pub fn size(self: @This()) u7 { + return switch (@enumToInt(self)) { + 0...7 => 32, + 8...15 => 16, + 16...23 => 8, + else => unreachable, + }; + } + + pub fn id(self: @This()) u3 { + return @intCast(u4, switch (@enumToInt(self)) { + 0...7 => |i| i, + 8...15 => |i| i - 8, + 16...23 => |i| i - 16, + else => unreachable, + }); + } + +}; + +// zig fmt: on diff --git a/src-self-hosted/backend/x86_64.zig b/src-self-hosted/backend/x86_64.zig new file mode 100644 index 0000000000..50b5d31273 --- /dev/null +++ b/src-self-hosted/backend/x86_64.zig @@ -0,0 +1,49 @@ +// zig fmt: off +pub const Register = enum(u8) { + // 0 through 15, 64-bit registers. 8-15 are extended. + // id is just the int value. + rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, + r8, r9, r10, r11, r12, r13, r14, r15, + + // 16 through 31, 32-bit registers. 24-31 are extended. + // id is int value - 16. + eax, ecx, edx, ebx, esp, ebp, esi, edi, + r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d, + + // 32-47, 16-bit registers. 40-47 are extended. + // id is int value - 32. + ax, cx, dx, bx, sp, bp, si, di, + r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w, + + // 48-63, 8-bit registers. 56-63 are extended. + // id is int value - 48. + al, bl, cl, dl, ah, ch, dh, bh, + r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, + + pub fn size(self: @This()) u7 { + return switch (@enumToInt(self)) { + 0...15 => 64, + 16...31 => 32, + 32...47 => 16, + 48...64 => 8, + else => unreachable, + }; + } + + pub fn isExtended(self: @This()) bool { + return @enumToInt(self) & 0x08 != 0; + } + + pub fn id(self: @This()) u4 { + return @intCast(u4, switch (@enumToInt(self)) { + 0...15 => |i| i, + 16...31 => |i| i - 16, + 32...47 => |i| i - 32, + 48...64 => |i| i - 48, + else => unreachable, + }); + } + +}; + +// zig fmt: on diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index ae1489136e..881b3b2ac5 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -11,6 +11,8 @@ const ErrorMsg = Module.ErrorMsg; const Target = std.Target; const Allocator = mem.Allocator; +const Backend = @import("backend.zig"); + pub const Result = union(enum) { /// The `code` parameter passed to `generateSymbol` has the value appended. appended: void, @@ -348,172 +350,182 @@ const Function = struct { } } - fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) !void { + fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { switch (arch) { - .x86_64 => switch (reg) { - .rax => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the eax register zeroes the upper part of rax, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 c0 xor eax,eax - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xc0 }); - } - // Next best case: set eax with 4 bytes - // b8 04 03 02 01 mov eax,0x01020304 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xb8; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rax with 8 bytes - // 48 b8 08 07 06 05 04 03 02 01 movabs rax,0x0102030405060708 - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xb8; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}), - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}), - .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rax = memory", .{}), - }, - .rdx => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the edx register zeroes the upper part of rdx, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 d2 xor edx,edx - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xd2 }); - } - // Next best case: set edx with 4 bytes - // ba 04 03 02 01 mov edx,0x1020304 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xba; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rdx with 8 bytes - // 48 ba 08 07 06 05 04 03 02 01 movabs rdx,0x0102030405060708 - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xba; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}), - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}), - .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = memory", .{}), - }, - .rdi => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the edi register zeroes the upper part of rdi, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 ff xor edi,edi - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xff }); - } - // Next best case: set edi with 4 bytes - // bf 04 03 02 01 mov edi,0x1020304 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xbf; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rdi with 8 bytes - // 48 bf 08 07 06 05 04 03 02 01 movabs rax,0x0102030405060708 - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xbf; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = embedded_in_code", .{}), - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = register", .{}), - .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = memory", .{}), - }, - .rsi => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the edi register zeroes the upper part of rdi, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 f6 xor esi,esi - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xf6 }); - } - // Next best case: set esi with 4 bytes - // be 40 30 20 10 mov esi,0x10203040 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xbe; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rsi with 8 bytes - // 48 be 80 70 60 50 40 30 20 10 movabs rsi,0x1020304050607080 - - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xbe; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => |code_offset| { - // Examples: - // lea rsi, [rip + 0x01020304] - // lea rsi, [rip - 7] - // f: 48 8d 35 04 03 02 01 lea rsi,[rip+0x1020304] # 102031a <_start+0x102031a> - // 16: 48 8d 35 f9 ff ff ff lea rsi,[rip+0xfffffffffffffff9] # 16 <_start+0x16> + .x86_64 => switch (mcv) { + .none, .unreach => unreachable, + .immediate => |x| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit + // register is the fastest way to zero a register. + if (x == 0) { + // The encoding for `xor r32, r32` is `0x31 /r`. + // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the + // ModR/M byte of the instruction contains a register operand and an r/m operand." // - // We need the offset from RIP in a signed i32 twos complement. - // The instruction is 7 bytes long and RIP points to the next instruction. - try self.code.resize(self.code.items.len + 7); - const rip = self.code.items.len; - const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); - const offset = @intCast(i32, big_offset); - self.code.items[self.code.items.len - 7] = 0x48; - self.code.items[self.code.items.len - 6] = 0x8d; - self.code.items[self.code.items.len - 5] = 0x35; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(i32, imm_ptr, offset); - return; - }, - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}), - .memory => |x| { - if (x <= std.math.maxInt(u32)) { - // 48 8b 34 25 40 30 20 10 mov rsi,QWORD PTR ds:0x10203040 - try self.code.resize(self.code.items.len + 8); - self.code.items[self.code.items.len - 8] = 0x48; - self.code.items[self.code.items.len - 7] = 0x8b; - self.code.items[self.code.items.len - 6] = 0x34; - self.code.items[self.code.items.len - 5] = 0x25; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; + // R/M bytes are composed of two bits for the mode, then three bits for the register, + // then three bits for the operand. Since we're zeroing a register, the two three-bit + // values will be identical, and the mode is three (the raw register value). + // + if (reg.isExtended()) { + // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since + // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. + // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. + // + // From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB. In this case, that's + // b01000101, or 0x45. + return self.code.appendSlice(&[_]u8{ + 0x45, + 0x31, + 0xC0 | (@intCast(u8, @truncate(u3, reg.id())) << 3) | @truncate(u3, reg.id()), + }); } else { - return self.fail(src, "TODO implement genSetReg for x86_64 setting rsi to 64-bit memory", .{}); + return self.code.appendSlice(&[_]u8{ + 0x31, + 0xC0 | (@intCast(u8, reg.id()) << 3) | @intCast(u3, reg.id()), + }); } - }, + } + if (x <= std.math.maxInt(u32)) { + // Next best case: if we set the lower four bytes, the upper four will be zeroed. + // + // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. + if (reg.isExtended()) { + // Just as with XORing, we need a REX prefix. This time though, we only + // need the B bit set, as we're extending the opcode's register field, + // and there is no Mod R/M byte. + // + // Thus, we need b01000001, or 0x41. + try self.code.resize(self.code.items.len + 6); + self.code.items[self.code.items.len - 6] = 0x41; + } else { + try self.code.resize(self.code.items.len + 5); + } + self.code.items[self.code.items.len - 5] = 0xB8 | @intCast(u8, @truncate(u3, reg.id())); + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + return; + } + // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls + // this `movabs`, though this is officially just a different variant of the plain `mov` + // instruction. + // + // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only + // difference is that we set REX.W before the instruction, which extends the load to + // 64-bit and uses the full bit-width of the register. + // + // Since we always need a REX here, let's just check if we also need to set REX.B. + // + // In this case, the encoding of the REX byte is 0b0100100B + const REX = 0x48 | (if (reg.isExtended()) @as(u8, 0x01) else 0); + try self.code.resize(self.code.items.len + 10); + self.code.items[self.code.items.len - 10] = REX; + self.code.items[self.code.items.len - 9] = 0xB8 | @intCast(u8, @truncate(u3, reg.id())); + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + }, + .embedded_in_code => |code_offset| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + // We need the offset from RIP in a signed i32 twos complement. + // The instruction is 7 bytes long and RIP points to the next instruction. + // + // 64-bit LEA is encoded as REX.W 8D /r. If the register is extended, the REX byte is modified, + // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three + // bits as five. + // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id. + try self.code.resize(self.code.items.len + 7); + const REX = 0x48 | if (reg.isExtended()) @as(u8, 1) else 0; + const rip = self.code.items.len; + const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); + const offset = @intCast(i32, big_offset); + self.code.items[self.code.items.len - 7] = REX; + self.code.items[self.code.items.len - 6] = 0x8D; + self.code.items[self.code.items.len - 5] = 0x5 | (@intCast(u8, @truncate(u3, reg.id())) << 3); + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(i32, imm_ptr, offset); + }, + .register => |r| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + const src_reg = @intToEnum(Reg(arch), @intCast(u8, r)); + // This is a varient of 8B /r. Since we're using 64-bit moves, we require a REX. + // This is thus three bytes: REX 0x8B R/M. + // If the destination is extended, the R field must be 1. + // If the *source* is extended, the B field must be 1. + // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle + // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. + const REX = 0x48 | (if (reg.isExtended()) @as(u8, 4) else 0) | (if (src_reg.isExtended()) @as(u8, 1) else 0); + const R = 0xC0 | (@intCast(u8, @truncate(u3, reg.id())) << 3) | @truncate(u3, src_reg.id()); + try self.code.appendSlice(&[_]u8{ REX, 0x8B, R }); + }, + .memory => |x| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + if (x <= std.math.maxInt(u32)) { + // Moving from memory to a register is a variant of `8B /r`. + // Since we're using 64-bit moves, we require a REX. + // This variant also requires a SIB, as it would otherwise be RIP-relative. + // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. + // The SIB must be 0x25, to indicate a disp32 with no scaled index. + // 0b00RRR100, where RRR is the lower three bits of the register ID. + // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. + try self.code.resize(self.code.items.len + 8); + const REX = 0x48 | if (reg.isExtended()) @as(u8, 1) else 0; + const r = 0x04 | (@intCast(u8, @truncate(u3, reg.id())) << 3); + self.code.items[self.code.items.len - 8] = REX; + self.code.items[self.code.items.len - 7] = 0x8B; + self.code.items[self.code.items.len - 6] = r; + self.code.items[self.code.items.len - 5] = 0x25; + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + } else { + // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load + // the value. + if (reg.id() == 0) { + // REX.W 0xA1 moffs64* + // moffs64* is a 64-bit offset "relative to segment base", which really just means the + // absolute address for all practical purposes. + try self.code.resize(self.code.items.len + 10); + // REX.W == 0x48 + self.code.items[self.code.items.len - 10] = 0x48; + self.code.items[self.code.items.len - 9] = 0xA1; + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + } else { + // This requires two instructions; a move imm as used above, followed by an indirect load using the register + // as the address and the register as the destination. + // + // This cannot be used if the lower three bits of the id are equal to four or five, as there + // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with + // this instruction. + const id3 = @truncate(u3, reg.id()); + std.debug.assert(id3 != 4 and id3 != 5); + + // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. + try self.genSetReg(src, arch, reg, MCValue{ .immediate = x }); + + // Now, the register contains the address of the value to load into it + // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. + // TODO: determine whether to allow other sized registers, and if so, handle them properly. + // This operation requires three bytes: REX 0x8B R/M + // + // For this operation, we want R/M mode *zero* (use register indirectly), and the two register + // values must match. Thus, it's 00ABCABC where ABC is the lower three bits of the register ID. + // + // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* + // register operands need to be marked as extended. + const REX = 0x48 | if (reg.isExtended()) @as(u8, 0b0101) else 0; + const RM = (@intCast(u8, @truncate(u3, reg.id())) << 3) | @truncate(u3, reg.id()); + try self.code.appendSlice(&[_]u8{ REX, 0x8B, RM }); + } + } }, - else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}), }, else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}), } @@ -579,113 +591,8 @@ const Function = struct { fn Reg(comptime arch: Target.Cpu.Arch) type { return switch (arch) { - .i386 => enum { - eax, - ebx, - ecx, - edx, - ebp, - esp, - esi, - edi, - - ax, - bx, - cx, - dx, - bp, - sp, - si, - di, - - ah, - bh, - ch, - dh, - - al, - bl, - cl, - dl, - }, - .x86_64 => enum { - rax, - rbx, - rcx, - rdx, - rbp, - rsp, - rsi, - rdi, - r8, - r9, - r10, - r11, - r12, - r13, - r14, - r15, - - eax, - ebx, - ecx, - edx, - ebp, - esp, - esi, - edi, - r8d, - r9d, - r10d, - r11d, - r12d, - r13d, - r14d, - r15d, - - ax, - bx, - cx, - dx, - bp, - sp, - si, - di, - r8w, - r9w, - r10w, - r11w, - r12w, - r13w, - r14w, - r15w, - - ah, - bh, - ch, - dh, - bph, - sph, - sih, - dih, - - al, - bl, - cl, - dl, - bpl, - spl, - sil, - dil, - r8b, - r9b, - r10b, - r11b, - r12b, - r13b, - r14b, - r15b, - }, + .i386 => Backend.x86.Register, + .x86_64 => Backend.x86_64.Register, else => @compileError("TODO add more register enums"), }; } From 638554544a27018fea2243bc69b6ca4ebef8d1e5 Mon Sep 17 00:00:00 2001 From: Noam Preil Date: Sun, 17 May 2020 04:06:17 -0400 Subject: [PATCH 25/31] Fix a dumb (thanks daurminator!) --- src-self-hosted/backend/x86_64.zig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src-self-hosted/backend/x86_64.zig b/src-self-hosted/backend/x86_64.zig index 50b5d31273..2d63dd9aa5 100644 --- a/src-self-hosted/backend/x86_64.zig +++ b/src-self-hosted/backend/x86_64.zig @@ -35,13 +35,7 @@ pub const Register = enum(u8) { } pub fn id(self: @This()) u4 { - return @intCast(u4, switch (@enumToInt(self)) { - 0...15 => |i| i, - 16...31 => |i| i - 16, - 32...47 => |i| i - 32, - 48...64 => |i| i - 48, - else => unreachable, - }); + return @truncate(u4, @enumToInt(self)); } }; From 497eb3182041a199d41c4c4fb7f44fa31d8c41e3 Mon Sep 17 00:00:00 2001 From: Noam Preil Date: Sun, 17 May 2020 04:34:18 -0400 Subject: [PATCH 26/31] Fix the dumb in x86 too --- src-self-hosted/backend/x86.zig | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src-self-hosted/backend/x86.zig b/src-self-hosted/backend/x86.zig index df1bb459ea..0de736f350 100644 --- a/src-self-hosted/backend/x86.zig +++ b/src-self-hosted/backend/x86.zig @@ -19,12 +19,7 @@ pub const Register = enum(u8) { } pub fn id(self: @This()) u3 { - return @intCast(u4, switch (@enumToInt(self)) { - 0...7 => |i| i, - 8...15 => |i| i - 8, - 16...23 => |i| i - 16, - else => unreachable, - }); + return @truncate(u3, @enumToInt(self)); } }; From e2196a458f7c0feac24036c694282ce31be9edce Mon Sep 17 00:00:00 2001 From: Noam Preil Date: Sun, 17 May 2020 04:47:25 -0400 Subject: [PATCH 27/31] Minor cleanup --- src-self-hosted/codegen.zig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 881b3b2ac5..fab3876d30 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -379,12 +379,12 @@ const Function = struct { return self.code.appendSlice(&[_]u8{ 0x45, 0x31, - 0xC0 | (@intCast(u8, @truncate(u3, reg.id())) << 3) | @truncate(u3, reg.id()), + 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()), }); } else { return self.code.appendSlice(&[_]u8{ 0x31, - 0xC0 | (@intCast(u8, reg.id()) << 3) | @intCast(u3, reg.id()), + 0xC0 | (@as(u8, reg.id()) << 3) | reg.id(), }); } } @@ -403,7 +403,7 @@ const Function = struct { } else { try self.code.resize(self.code.items.len + 5); } - self.code.items[self.code.items.len - 5] = 0xB8 | @intCast(u8, @truncate(u3, reg.id())); + self.code.items[self.code.items.len - 5] = 0xB8 | @as(u8, reg.id() & 0b111); const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); return; @@ -422,7 +422,7 @@ const Function = struct { const REX = 0x48 | (if (reg.isExtended()) @as(u8, 0x01) else 0); try self.code.resize(self.code.items.len + 10); self.code.items[self.code.items.len - 10] = REX; - self.code.items[self.code.items.len - 9] = 0xB8 | @intCast(u8, @truncate(u3, reg.id())); + self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111); const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; mem.writeIntLittle(u64, imm_ptr, x); }, @@ -444,7 +444,7 @@ const Function = struct { const offset = @intCast(i32, big_offset); self.code.items[self.code.items.len - 7] = REX; self.code.items[self.code.items.len - 6] = 0x8D; - self.code.items[self.code.items.len - 5] = 0x5 | (@intCast(u8, @truncate(u3, reg.id())) << 3); + self.code.items[self.code.items.len - 5] = 0b101 | (@as(u8, reg.id() & 0b111) << 3); const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; mem.writeIntLittle(i32, imm_ptr, offset); }, @@ -460,7 +460,7 @@ const Function = struct { // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. const REX = 0x48 | (if (reg.isExtended()) @as(u8, 4) else 0) | (if (src_reg.isExtended()) @as(u8, 1) else 0); - const R = 0xC0 | (@intCast(u8, @truncate(u3, reg.id())) << 3) | @truncate(u3, src_reg.id()); + const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, src_reg.id()); try self.code.appendSlice(&[_]u8{ REX, 0x8B, R }); }, .memory => |x| { @@ -477,7 +477,7 @@ const Function = struct { // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. try self.code.resize(self.code.items.len + 8); const REX = 0x48 | if (reg.isExtended()) @as(u8, 1) else 0; - const r = 0x04 | (@intCast(u8, @truncate(u3, reg.id())) << 3); + const r = 0x04 | (@as(u8, reg.id() & 0b111) << 3); self.code.items[self.code.items.len - 8] = REX; self.code.items[self.code.items.len - 7] = 0x8B; self.code.items[self.code.items.len - 6] = r; @@ -521,7 +521,7 @@ const Function = struct { // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* // register operands need to be marked as extended. const REX = 0x48 | if (reg.isExtended()) @as(u8, 0b0101) else 0; - const RM = (@intCast(u8, @truncate(u3, reg.id())) << 3) | @truncate(u3, reg.id()); + const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()); try self.code.appendSlice(&[_]u8{ REX, 0x8B, RM }); } } From 773281c1f47f4e0379d6a3d314c00f752151d89f Mon Sep 17 00:00:00 2001 From: Noam Preil Date: Sun, 17 May 2020 05:17:39 -0400 Subject: [PATCH 28/31] Remove trailing whitespace --- src-self-hosted/backend/x86.zig | 1 - src-self-hosted/backend/x86_64.zig | 1 - 2 files changed, 2 deletions(-) diff --git a/src-self-hosted/backend/x86.zig b/src-self-hosted/backend/x86.zig index 0de736f350..f742e38a54 100644 --- a/src-self-hosted/backend/x86.zig +++ b/src-self-hosted/backend/x86.zig @@ -21,7 +21,6 @@ pub const Register = enum(u8) { pub fn id(self: @This()) u3 { return @truncate(u3, @enumToInt(self)); } - }; // zig fmt: on diff --git a/src-self-hosted/backend/x86_64.zig b/src-self-hosted/backend/x86_64.zig index 2d63dd9aa5..8c8819466c 100644 --- a/src-self-hosted/backend/x86_64.zig +++ b/src-self-hosted/backend/x86_64.zig @@ -37,7 +37,6 @@ pub const Register = enum(u8) { pub fn id(self: @This()) u4 { return @truncate(u4, @enumToInt(self)); } - }; // zig fmt: on From e198eec76a478fa9d1cb7b6d7488e273a0d15ff1 Mon Sep 17 00:00:00 2001 From: Noam Preil Date: Sun, 17 May 2020 05:35:07 -0400 Subject: [PATCH 29/31] Document register functions --- src-self-hosted/backend/x86.zig | 4 ++++ src-self-hosted/backend/x86_64.zig | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/src-self-hosted/backend/x86.zig b/src-self-hosted/backend/x86.zig index f742e38a54..60872dedb9 100644 --- a/src-self-hosted/backend/x86.zig +++ b/src-self-hosted/backend/x86.zig @@ -9,6 +9,7 @@ pub const Register = enum(u8) { // 16-23, 8-bit registers. id is int value - 16. al, bl, cl, dl, ah, ch, dh, bh, + /// Returns the bit-width of the register. pub fn size(self: @This()) u7 { return switch (@enumToInt(self)) { 0...7 => 32, @@ -18,6 +19,9 @@ pub const Register = enum(u8) { }; } + /// Returns the register's id. This is used in practically every opcode the + /// x86 has. It is embedded in some instructions, such as the `B8 +rd` move + /// instruction, and is used in the R/M byte. pub fn id(self: @This()) u3 { return @truncate(u3, @enumToInt(self)); } diff --git a/src-self-hosted/backend/x86_64.zig b/src-self-hosted/backend/x86_64.zig index 8c8819466c..0cc008ae1b 100644 --- a/src-self-hosted/backend/x86_64.zig +++ b/src-self-hosted/backend/x86_64.zig @@ -20,6 +20,7 @@ pub const Register = enum(u8) { al, bl, cl, dl, ah, ch, dh, bh, r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, + /// Returns the bit-width of the register. pub fn size(self: @This()) u7 { return switch (@enumToInt(self)) { 0...15 => 64, @@ -30,10 +31,20 @@ pub const Register = enum(u8) { }; } + /// Returns whether the register is *extended*. Extended registers are the + /// new registers added with amd64, r8 through r15. This also includes any + /// other variant of access to those registers, such as r8b, r15d, and so + /// on. This is needed because access to these registers requires special + /// handling via the REX prefix, via the B or R bits, depending on context. pub fn isExtended(self: @This()) bool { return @enumToInt(self) & 0x08 != 0; } + /// This returns the 4-bit register ID, which is used in practically every + /// opcode. Note that bit 3 (the highest bit) is *never* used directly in + /// an instruction (@see isExtended), and requires special handling. The + /// lower three bits are often embedded directly in instructions (such as + /// the B8 variant of moves), or used in R/M bytes. pub fn id(self: @This()) u4 { return @truncate(u4, @enumToInt(self)); } From 88c8ff6e374334e538a686781d608c1b204aeb45 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 17 May 2020 12:08:47 -0400 Subject: [PATCH 30/31] move some files around --- src-self-hosted/backend.zig | 2 -- src-self-hosted/codegen.zig | 27 ++++++++++++++----- src-self-hosted/{backend => codegen}/x86.zig | 0 .../{backend => codegen}/x86_64.zig | 0 4 files changed, 21 insertions(+), 8 deletions(-) delete mode 100644 src-self-hosted/backend.zig rename src-self-hosted/{backend => codegen}/x86.zig (100%) rename src-self-hosted/{backend => codegen}/x86_64.zig (100%) diff --git a/src-self-hosted/backend.zig b/src-self-hosted/backend.zig deleted file mode 100644 index 9222a6e3c3..0000000000 --- a/src-self-hosted/backend.zig +++ /dev/null @@ -1,2 +0,0 @@ -pub const x86_64 = @import("backend/x86_64.zig"); -pub const x86 = @import("backend/x86.zig"); diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index fab3876d30..08a7b29ca3 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -11,8 +11,6 @@ const ErrorMsg = Module.ErrorMsg; const Target = std.Target; const Allocator = mem.Allocator; -const Backend = @import("backend.zig"); - pub const Result = union(enum) { /// The `code` parameter passed to `generateSymbol` has the value appended. appended: void, @@ -194,14 +192,28 @@ const Function = struct { }, else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}), } - return .unreach; + return .none; } fn genCall(self: *Function, inst: *ir.Inst.Call) !MCValue { + if (inst.args.func.cast(ir.Inst.Constant)) |func_inst| { + if (inst.args.args.len != 0) { + return self.fail(inst.base.src, "TODO implement call with more than 0 parameters", .{}); + } + + if (func_inst.val.cast(Value.Payload.Function)) |func_val| { + const func = func_val.func; + return self.fail(inst.base.src, "TODO implement calling function", .{}); + } else { + return self.fail(inst.base.src, "TODO implement calling weird function values", .{}); + } + } else { + return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); + } + switch (self.target.cpu.arch) { else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), } - return .unreach; } fn genRet(self: *Function, inst: *ir.Inst.Ret) !MCValue { @@ -589,10 +601,13 @@ const Function = struct { } }; +const x86_64 = @import("codegen/x86_64.zig"); +const x86 = @import("codegen/x86.zig"); + fn Reg(comptime arch: Target.Cpu.Arch) type { return switch (arch) { - .i386 => Backend.x86.Register, - .x86_64 => Backend.x86_64.Register, + .i386 => x86.Register, + .x86_64 => x86_64.Register, else => @compileError("TODO add more register enums"), }; } diff --git a/src-self-hosted/backend/x86.zig b/src-self-hosted/codegen/x86.zig similarity index 100% rename from src-self-hosted/backend/x86.zig rename to src-self-hosted/codegen/x86.zig diff --git a/src-self-hosted/backend/x86_64.zig b/src-self-hosted/codegen/x86_64.zig similarity index 100% rename from src-self-hosted/backend/x86_64.zig rename to src-self-hosted/codegen/x86_64.zig From b0968abccbfb4072528c3b5e039bc03b27af89a1 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 17 May 2020 13:49:22 -0400 Subject: [PATCH 31/31] update ZIR compare output test to test incremental updates --- src-self-hosted/test.zig | 106 +++++++++++--------- test/stage2/zir.zig | 212 ++++++++++++++++++++++++++------------- 2 files changed, 202 insertions(+), 116 deletions(-) diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig index 9bb0b8846b..451bba996a 100644 --- a/src-self-hosted/test.zig +++ b/src-self-hosted/test.zig @@ -21,8 +21,8 @@ pub const TestContext = struct { pub const ZIRCompareOutputCase = struct { name: []const u8, - src: [:0]const u8, - expected_stdout: []const u8, + src_list: []const []const u8, + expected_stdout_list: []const []const u8, }; pub const ZIRTransformCase = struct { @@ -35,13 +35,13 @@ pub const TestContext = struct { pub fn addZIRCompareOutput( ctx: *TestContext, name: []const u8, - src: [:0]const u8, - expected_stdout: []const u8, + src_list: []const []const u8, + expected_stdout_list: []const []const u8, ) void { ctx.zir_cmp_output_cases.append(.{ .name = name, - .src = src, - .expected_stdout = expected_stdout, + .src_list = src_list, + .expected_stdout_list = expected_stdout_list, }) catch unreachable; } @@ -104,56 +104,68 @@ pub const TestContext = struct { var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); - var prg_node = root_node.start(case.name, 2); - prg_node.activate(); - defer prg_node.end(); - const tmp_src_path = "test-case.zir"; - try tmp.dir.writeFile(tmp_src_path, case.src); - const root_pkg = try Package.create(allocator, tmp.dir, ".", tmp_src_path); defer root_pkg.destroy(); - { - var module = try Module.init(allocator, .{ - .target = target, - .output_mode = .Exe, - .optimize_mode = .Debug, - .bin_file_dir = tmp.dir, - .bin_file_path = "a.out", - .root_pkg = root_pkg, - }); - defer module.deinit(); + var prg_node = root_node.start(case.name, case.src_list.len); + prg_node.activate(); + defer prg_node.end(); - var module_node = prg_node.start("parse,analysis,codegen", null); - module_node.activate(); + var module = try Module.init(allocator, .{ + .target = target, + .output_mode = .Exe, + .optimize_mode = .Debug, + .bin_file_dir = tmp.dir, + .bin_file_path = "a.out", + .root_pkg = root_pkg, + }); + defer module.deinit(); + + for (case.src_list) |source, i| { + var src_node = prg_node.start("update", 2); + src_node.activate(); + defer src_node.end(); + + try tmp.dir.writeFile(tmp_src_path, source); + + var update_node = src_node.start("parse,analysis,codegen", null); + update_node.activate(); + try module.makeBinFileWritable(); try module.update(); - module_node.end(); - } + update_node.end(); - var exec_result = x: { - var exec_node = prg_node.start("execute", null); - exec_node.activate(); - defer exec_node.end(); + var exec_result = x: { + var exec_node = src_node.start("execute", null); + exec_node.activate(); + defer exec_node.end(); - break :x try std.ChildProcess.exec(.{ - .allocator = allocator, - .argv = &[_][]const u8{"./a.out"}, - .cwd_dir = tmp.dir, - }); - }; - defer allocator.free(exec_result.stdout); - defer allocator.free(exec_result.stderr); - switch (exec_result.term) { - .Exited => |code| { - if (code != 0) { - std.debug.warn("elf file exited with code {}\n", .{code}); - return error.BinaryBadExitCode; - } - }, - else => return error.BinaryCrashed, + try module.makeBinFileExecutable(); + break :x try std.ChildProcess.exec(.{ + .allocator = allocator, + .argv = &[_][]const u8{"./a.out"}, + .cwd_dir = tmp.dir, + }); + }; + defer allocator.free(exec_result.stdout); + defer allocator.free(exec_result.stderr); + switch (exec_result.term) { + .Exited => |code| { + if (code != 0) { + std.debug.warn("elf file exited with code {}\n", .{code}); + return error.BinaryBadExitCode; + } + }, + else => return error.BinaryCrashed, + } + const expected_stdout = case.expected_stdout_list[i]; + if (!std.mem.eql(u8, expected_stdout, exec_result.stdout)) { + std.debug.panic( + "update index {}, mismatched stdout\n====Expected (len={}):====\n{}\n====Actual (len={}):====\n{}\n========\n", + .{ i, expected_stdout.len, expected_stdout, exec_result.stdout.len, exec_result.stdout }, + ); + } } - std.testing.expectEqualSlices(u8, case.expected_stdout, exec_result.stdout); } fn runOneZIRTransformCase( diff --git a/test/stage2/zir.zig b/test/stage2/zir.zig index 868ded42ed..afee3e7895 100644 --- a/test/stage2/zir.zig +++ b/test/stage2/zir.zig @@ -65,74 +65,148 @@ pub fn addCases(ctx: *TestContext) void { return; } - ctx.addZIRCompareOutput("hello world ZIR", - \\@noreturn = primitive(noreturn) - \\@void = primitive(void) - \\@usize = primitive(usize) - \\@0 = int(0) - \\@1 = int(1) - \\@2 = int(2) - \\@3 = int(3) - \\ - \\@syscall_array = str("syscall") - \\@sysoutreg_array = str("={rax}") - \\@rax_array = str("{rax}") - \\@rdi_array = str("{rdi}") - \\@rcx_array = str("rcx") - \\@r11_array = str("r11") - \\@rdx_array = str("{rdx}") - \\@rsi_array = str("{rsi}") - \\@memory_array = str("memory") - \\@len_array = str("len") - \\ - \\@msg = str("Hello, world!\n") - \\ - \\@start_fnty = fntype([], @noreturn, cc=Naked) - \\@start = fn(@start_fnty, { - \\ %SYS_exit_group = int(231) - \\ %exit_code = as(@usize, @0) - \\ - \\ %syscall = ref(@syscall_array) - \\ %sysoutreg = ref(@sysoutreg_array) - \\ %rax = ref(@rax_array) - \\ %rdi = ref(@rdi_array) - \\ %rcx = ref(@rcx_array) - \\ %rdx = ref(@rdx_array) - \\ %rsi = ref(@rsi_array) - \\ %r11 = ref(@r11_array) - \\ %memory = ref(@memory_array) - \\ - \\ %SYS_write = as(@usize, @1) - \\ %STDOUT_FILENO = as(@usize, @1) - \\ - \\ %msg_ptr = ref(@msg) - \\ %msg_addr = ptrtoint(%msg_ptr) - \\ - \\ %len_name = ref(@len_array) - \\ %msg_len_ptr = fieldptr(%msg_ptr, %len_name) - \\ %msg_len = deref(%msg_len_ptr) - \\ %rc_write = asm(%syscall, @usize, - \\ volatile=1, - \\ output=%sysoutreg, - \\ inputs=[%rax, %rdi, %rsi, %rdx], - \\ clobbers=[%rcx, %r11, %memory], - \\ args=[%SYS_write, %STDOUT_FILENO, %msg_addr, %msg_len]) - \\ - \\ %rc_exit = asm(%syscall, @usize, - \\ volatile=1, - \\ output=%sysoutreg, - \\ inputs=[%rax, %rdi], - \\ clobbers=[%rcx, %r11, %memory], - \\ args=[%SYS_exit_group, %exit_code]) - \\ - \\ %99 = unreachable() - \\}); - \\ - \\@9 = str("_start") - \\@10 = ref(@9) - \\@11 = export(@10, @start) - , - \\Hello, world! - \\ + ctx.addZIRCompareOutput( + "hello world ZIR, update msg", + &[_][]const u8{ + \\@noreturn = primitive(noreturn) + \\@void = primitive(void) + \\@usize = primitive(usize) + \\@0 = int(0) + \\@1 = int(1) + \\@2 = int(2) + \\@3 = int(3) + \\ + \\@syscall_array = str("syscall") + \\@sysoutreg_array = str("={rax}") + \\@rax_array = str("{rax}") + \\@rdi_array = str("{rdi}") + \\@rcx_array = str("rcx") + \\@r11_array = str("r11") + \\@rdx_array = str("{rdx}") + \\@rsi_array = str("{rsi}") + \\@memory_array = str("memory") + \\@len_array = str("len") + \\ + \\@msg = str("Hello, world!\n") + \\ + \\@start_fnty = fntype([], @noreturn, cc=Naked) + \\@start = fn(@start_fnty, { + \\ %SYS_exit_group = int(231) + \\ %exit_code = as(@usize, @0) + \\ + \\ %syscall = ref(@syscall_array) + \\ %sysoutreg = ref(@sysoutreg_array) + \\ %rax = ref(@rax_array) + \\ %rdi = ref(@rdi_array) + \\ %rcx = ref(@rcx_array) + \\ %rdx = ref(@rdx_array) + \\ %rsi = ref(@rsi_array) + \\ %r11 = ref(@r11_array) + \\ %memory = ref(@memory_array) + \\ + \\ %SYS_write = as(@usize, @1) + \\ %STDOUT_FILENO = as(@usize, @1) + \\ + \\ %msg_ptr = ref(@msg) + \\ %msg_addr = ptrtoint(%msg_ptr) + \\ + \\ %len_name = ref(@len_array) + \\ %msg_len_ptr = fieldptr(%msg_ptr, %len_name) + \\ %msg_len = deref(%msg_len_ptr) + \\ %rc_write = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi, %rsi, %rdx], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_write, %STDOUT_FILENO, %msg_addr, %msg_len]) + \\ + \\ %rc_exit = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_exit_group, %exit_code]) + \\ + \\ %99 = unreachable() + \\}); + \\ + \\@9 = str("_start") + \\@10 = ref(@9) + \\@11 = export(@10, @start) + , + \\@noreturn = primitive(noreturn) + \\@void = primitive(void) + \\@usize = primitive(usize) + \\@0 = int(0) + \\@1 = int(1) + \\@2 = int(2) + \\@3 = int(3) + \\ + \\@syscall_array = str("syscall") + \\@sysoutreg_array = str("={rax}") + \\@rax_array = str("{rax}") + \\@rdi_array = str("{rdi}") + \\@rcx_array = str("rcx") + \\@r11_array = str("r11") + \\@rdx_array = str("{rdx}") + \\@rsi_array = str("{rsi}") + \\@memory_array = str("memory") + \\@len_array = str("len") + \\ + \\@msg = str("Hello, world!\n") + \\@msg2 = str("HELL WORLD\n") + \\ + \\@start_fnty = fntype([], @noreturn, cc=Naked) + \\@start = fn(@start_fnty, { + \\ %SYS_exit_group = int(231) + \\ %exit_code = as(@usize, @0) + \\ + \\ %syscall = ref(@syscall_array) + \\ %sysoutreg = ref(@sysoutreg_array) + \\ %rax = ref(@rax_array) + \\ %rdi = ref(@rdi_array) + \\ %rcx = ref(@rcx_array) + \\ %rdx = ref(@rdx_array) + \\ %rsi = ref(@rsi_array) + \\ %r11 = ref(@r11_array) + \\ %memory = ref(@memory_array) + \\ + \\ %SYS_write = as(@usize, @1) + \\ %STDOUT_FILENO = as(@usize, @1) + \\ + \\ %msg_ptr = ref(@msg2) + \\ %msg_addr = ptrtoint(%msg_ptr) + \\ + \\ %len_name = ref(@len_array) + \\ %msg_len_ptr = fieldptr(%msg_ptr, %len_name) + \\ %msg_len = deref(%msg_len_ptr) + \\ %rc_write = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi, %rsi, %rdx], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_write, %STDOUT_FILENO, %msg_addr, %msg_len]) + \\ + \\ %rc_exit = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_exit_group, %exit_code]) + \\ + \\ %99 = unreachable() + \\}); + \\ + \\@9 = str("_start") + \\@10 = ref(@9) + \\@11 = export(@10, @start) + }, + &[_][]const u8{ + \\Hello, world! + \\ + , + \\HELL WORLD + \\ + }, ); }