diff --git a/README.md b/README.md index 3bb9302cd2..916f2e16f3 100644 --- a/README.md +++ b/README.md @@ -71,40 +71,3 @@ can do about it. See that issue for a workaround you can do in the meantime. ##### Windows See https://github.com/ziglang/zig/wiki/Building-Zig-on-Windows - -### Stage 2: Build Self-Hosted Zig from Zig Source Code - -*Note: Stage 2 compiler is not complete. Beta users of Zig should use the -Stage 1 compiler for now.* - -Dependencies are the same as Stage 1, except now you can use stage 1 to compile -Zig code. - -``` -bin/zig build --prefix $(pwd)/stage2 -``` - -This produces `./stage2/bin/zig` which can be used for testing and development. -Once it is feature complete, it will be used to build stage 3 - the final compiler -binary. - -### Stage 3: Rebuild Self-Hosted Zig Using the Self-Hosted Compiler - -*Note: Stage 2 compiler is not yet able to build Stage 3. Building Stage 3 is -not yet supported.* - -Once the self-hosted compiler can build itself, this will be the actual -compiler binary that we will install to the system. Until then, users should -use stage 1. - -#### Debug / Development Build - -``` -./stage2/bin/zig build --prefix $(pwd)/stage3 -``` - -#### Release / Install Build - -``` -./stage2/bin/zig build install -Drelease -``` diff --git a/build.zig b/build.zig index ab1d985b74..3636da4f28 100644 --- a/build.zig +++ b/build.zig @@ -51,6 +51,8 @@ pub fn build(b: *Builder) !void { var exe = b.addExecutable("zig", "src-self-hosted/main.zig"); exe.setBuildMode(mode); + test_step.dependOn(&exe.step); + b.default_step.dependOn(&exe.step); const skip_release = b.option(bool, "skip-release", "Main test suite skips release builds") orelse false; const skip_release_small = b.option(bool, "skip-release-small", "Main test suite skips release-small builds") orelse skip_release; @@ -58,21 +60,20 @@ pub fn build(b: *Builder) !void { const skip_release_safe = b.option(bool, "skip-release-safe", "Main test suite skips release-safe builds") orelse skip_release; const skip_non_native = b.option(bool, "skip-non-native", "Main test suite skips non-native builds") orelse false; const skip_libc = b.option(bool, "skip-libc", "Main test suite skips tests that link libc") orelse false; - const skip_self_hosted = (b.option(bool, "skip-self-hosted", "Main test suite skips building self hosted compiler") orelse false) or true; // TODO evented I/O good enough that this passes everywhere - if (!skip_self_hosted) { - test_step.dependOn(&exe.step); - } const only_install_lib_files = b.option(bool, "lib-files-only", "Only install library files") orelse false; - if (!only_install_lib_files and !skip_self_hosted) { + const enable_llvm = b.option(bool, "enable-llvm", "Build self-hosted compiler with LLVM backend enabled") orelse false; + if (enable_llvm) { var ctx = parseConfigH(b, config_h_text); ctx.llvm = try findLLVM(b, ctx.llvm_config_exe); try configureStage2(b, exe, ctx); - - b.default_step.dependOn(&exe.step); + } + if (!only_install_lib_files) { exe.install(); } + const link_libc = b.option(bool, "force-link-libc", "Force self-hosted compiler to link libc") orelse false; + if (link_libc) exe.linkLibC(); b.installDirectory(InstallDirectoryOptions{ .source_dir = "lib", diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index 97fca103bb..b9dea8a12f 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -8,13 +8,13 @@ const Allocator = mem.Allocator; /// A contiguous, growable list of items in memory. /// This is a wrapper around an array of T values. Initialize with `init`. pub fn ArrayList(comptime T: type) type { - return AlignedArrayList(T, null); + return ArrayListAligned(T, null); } -pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { +pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { if (alignment) |a| { if (a == @alignOf(T)) { - return AlignedArrayList(T, null); + return ArrayListAligned(T, null); } } return struct { @@ -76,6 +76,10 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { }; } + pub fn toUnmanaged(self: Self) ArrayListAlignedUnmanaged(T, alignment) { + return .{ .items = self.items, .capacity = self.capacity }; + } + /// The caller owns the returned memory. ArrayList becomes empty. pub fn toOwnedSlice(self: *Self) Slice { const allocator = self.allocator; @@ -84,8 +88,8 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { return result; } - /// Insert `item` at index `n`. Moves `list[n .. list.len]` - /// to make room. + /// Insert `item` at index `n` by moving `list[n .. list.len]` to make room. + /// This operation is O(N). pub fn insert(self: *Self, n: usize, item: T) !void { try self.ensureCapacity(self.items.len + 1); self.items.len += 1; @@ -94,8 +98,7 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { self.items[n] = item; } - /// Insert slice `items` at index `i`. Moves - /// `list[i .. list.len]` to make room. + /// Insert slice `items` at index `i` by moving `list[i .. list.len]` to make room. /// This operation is O(N). pub fn insertSlice(self: *Self, i: usize, items: SliceConst) !void { try self.ensureCapacity(self.items.len + items.len); @@ -146,10 +149,15 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { /// Append the slice of items to the list. Allocates more /// memory as necessary. pub fn appendSlice(self: *Self, items: SliceConst) !void { + try self.ensureCapacity(self.items.len + items.len); + self.appendSliceAssumeCapacity(items); + } + + /// Append the slice of items to the list, asserting the capacity is already + /// enough to store the new items. + pub fn appendSliceAssumeCapacity(self: *Self, items: SliceConst) void { const oldlen = self.items.len; const newlen = self.items.len + items.len; - - try self.ensureCapacity(newlen); self.items.len = newlen; mem.copy(T, self.items[oldlen..], items); } @@ -259,6 +267,231 @@ pub fn AlignedArrayList(comptime T: type, comptime alignment: ?u29) type { }; } +/// Bring-your-own allocator with every function call. +/// Initialize directly and deinitialize with `deinit` or use `toOwnedSlice`. +pub fn ArrayListUnmanaged(comptime T: type) type { + return ArrayListAlignedUnmanaged(T, null); +} + +pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) type { + if (alignment) |a| { + if (a == @alignOf(T)) { + return ArrayListAlignedUnmanaged(T, null); + } + } + return struct { + const Self = @This(); + + /// Content of the ArrayList. + items: Slice = &[_]T{}, + capacity: usize = 0, + + pub const Slice = if (alignment) |a| ([]align(a) T) else []T; + pub const SliceConst = if (alignment) |a| ([]align(a) const T) else []const T; + + /// Initialize with capacity to hold at least num elements. + /// Deinitialize with `deinit` or use `toOwnedSlice`. + pub fn initCapacity(allocator: *Allocator, num: usize) !Self { + var self = Self.init(allocator); + try self.ensureCapacity(allocator, num); + return self; + } + + /// Release all allocated memory. + pub fn deinit(self: *Self, allocator: *Allocator) void { + allocator.free(self.allocatedSlice()); + self.* = undefined; + } + + pub fn toManaged(self: *Self, allocator: *Allocator) ArrayListAligned(T, alignment) { + return .{ .items = self.items, .capacity = self.capacity, .allocator = allocator }; + } + + /// The caller owns the returned memory. ArrayList becomes empty. + pub fn toOwnedSlice(self: *Self, allocator: *Allocator) Slice { + const result = allocator.shrink(self.allocatedSlice(), self.items.len); + self.* = Self{}; + return result; + } + + /// Insert `item` at index `n`. Moves `list[n .. list.len]` + /// to make room. + pub fn insert(self: *Self, allocator: *Allocator, n: usize, item: T) !void { + try self.ensureCapacity(allocator, self.items.len + 1); + self.items.len += 1; + + mem.copyBackwards(T, self.items[n + 1 .. self.items.len], self.items[n .. self.items.len - 1]); + self.items[n] = item; + } + + /// Insert slice `items` at index `i`. Moves + /// `list[i .. list.len]` to make room. + /// This operation is O(N). + pub fn insertSlice(self: *Self, allocator: *Allocator, i: usize, items: SliceConst) !void { + try self.ensureCapacity(allocator, self.items.len + items.len); + self.items.len += items.len; + + mem.copyBackwards(T, self.items[i + items.len .. self.items.len], self.items[i .. self.items.len - items.len]); + mem.copy(T, self.items[i .. i + items.len], items); + } + + /// Extend the list by 1 element. Allocates more memory as necessary. + pub fn append(self: *Self, allocator: *Allocator, item: T) !void { + const new_item_ptr = try self.addOne(allocator); + new_item_ptr.* = item; + } + + /// Extend the list by 1 element, but asserting `self.capacity` + /// is sufficient to hold an additional item. + pub fn appendAssumeCapacity(self: *Self, item: T) void { + const new_item_ptr = self.addOneAssumeCapacity(); + new_item_ptr.* = item; + } + + /// Remove the element at index `i` from the list and return its value. + /// Asserts the array has at least one item. + /// This operation is O(N). + pub fn orderedRemove(self: *Self, i: usize) T { + const newlen = self.items.len - 1; + if (newlen == i) return self.pop(); + + const old_item = self.items[i]; + for (self.items[i..newlen]) |*b, j| b.* = self.items[i + 1 + j]; + self.items[newlen] = undefined; + self.items.len = newlen; + return old_item; + } + + /// Removes the element at the specified index and returns it. + /// The empty slot is filled from the end of the list. + /// This operation is O(1). + pub fn swapRemove(self: *Self, i: usize) T { + if (self.items.len - 1 == i) return self.pop(); + + const old_item = self.items[i]; + self.items[i] = self.pop(); + return old_item; + } + + /// Append the slice of items to the list. Allocates more + /// memory as necessary. + pub fn appendSlice(self: *Self, allocator: *Allocator, items: SliceConst) !void { + try self.ensureCapacity(allocator, self.items.len + items.len); + self.appendSliceAssumeCapacity(items); + } + + /// Append the slice of items to the list, asserting the capacity is enough + /// to store the new items. + pub fn appendSliceAssumeCapacity(self: *Self, items: SliceConst) void { + const oldlen = self.items.len; + const newlen = self.items.len + items.len; + + self.items.len = newlen; + mem.copy(T, self.items[oldlen..], items); + } + + /// Same as `append` except it returns the number of bytes written, which is always the same + /// as `m.len`. The purpose of this function existing is to match `std.io.OutStream` API. + /// This function may be called only when `T` is `u8`. + fn appendWrite(self: *Self, allocator: *Allocator, m: []const u8) !usize { + try self.appendSlice(allocator, m); + return m.len; + } + + /// Append a value to the list `n` times. + /// Allocates more memory as necessary. + pub fn appendNTimes(self: *Self, allocator: *Allocator, value: T, n: usize) !void { + const old_len = self.items.len; + try self.resize(self.items.len + n); + mem.set(T, self.items[old_len..self.items.len], value); + } + + /// Adjust the list's length to `new_len`. + /// Does not initialize added items if any. + pub fn resize(self: *Self, allocator: *Allocator, new_len: usize) !void { + try self.ensureCapacity(allocator, new_len); + self.items.len = new_len; + } + + /// Reduce allocated capacity to `new_len`. + /// Invalidates element pointers. + pub fn shrink(self: *Self, allocator: *Allocator, new_len: usize) void { + assert(new_len <= self.items.len); + + self.items = allocator.realloc(self.allocatedSlice(), new_len) catch |e| switch (e) { + error.OutOfMemory => { // no problem, capacity is still correct then. + self.items.len = new_len; + return; + }, + }; + self.capacity = new_len; + } + + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { + var better_capacity = self.capacity; + if (better_capacity >= new_capacity) return; + + while (true) { + better_capacity += better_capacity / 2 + 8; + if (better_capacity >= new_capacity) break; + } + + const new_memory = try allocator.realloc(self.allocatedSlice(), better_capacity); + self.items.ptr = new_memory.ptr; + self.capacity = new_memory.len; + } + + /// Increases the array's length to match the full capacity that is already allocated. + /// The new elements have `undefined` values. + /// This operation does not invalidate any element pointers. + pub fn expandToCapacity(self: *Self) void { + self.items.len = self.capacity; + } + + /// Increase length by 1, returning pointer to the new item. + /// The returned pointer becomes invalid when the list is resized. + pub fn addOne(self: *Self, allocator: *Allocator) !*T { + const newlen = self.items.len + 1; + try self.ensureCapacity(allocator, newlen); + return self.addOneAssumeCapacity(); + } + + /// Increase length by 1, returning pointer to the new item. + /// Asserts that there is already space for the new item without allocating more. + /// The returned pointer becomes invalid when the list is resized. + /// This operation does not invalidate any element pointers. + pub fn addOneAssumeCapacity(self: *Self) *T { + assert(self.items.len < self.capacity); + + self.items.len += 1; + return &self.items[self.items.len - 1]; + } + + /// Remove and return the last element from the list. + /// Asserts the list has at least one item. + /// This operation does not invalidate any element pointers. + pub fn pop(self: *Self) T { + const val = self.items[self.items.len - 1]; + self.items.len -= 1; + return val; + } + + /// Remove and return the last element from the list. + /// If the list is empty, returns `null`. + /// This operation does not invalidate any element pointers. + pub fn popOrNull(self: *Self) ?T { + if (self.items.len == 0) return null; + return self.pop(); + } + + /// For a nicer API, `items.len` is the length, not the capacity. + /// This requires "unsafe" slicing. + fn allocatedSlice(self: Self) Slice { + return self.items.ptr[0..self.capacity]; + } + }; +} + test "std.ArrayList.init" { var list = ArrayList(i32).init(testing.allocator); defer list.deinit(); diff --git a/lib/std/fifo.zig b/lib/std/fifo.zig index 6bbec57072..de75130363 100644 --- a/lib/std/fifo.zig +++ b/lib/std/fifo.zig @@ -191,8 +191,8 @@ pub fn LinearFifo( } /// Read the next item from the fifo - pub fn readItem(self: *Self) !T { - if (self.count == 0) return error.EndOfStream; + pub fn readItem(self: *Self) ?T { + if (self.count == 0) return null; const c = self.buf[self.head]; self.discard(1); @@ -282,7 +282,10 @@ pub fn LinearFifo( /// Write a single item to the fifo pub fn writeItem(self: *Self, item: T) !void { try self.ensureUnusedCapacity(1); + return self.writeItemAssumeCapacity(item); + } + pub fn writeItemAssumeCapacity(self: *Self, item: T) void { var tail = self.head + self.count; if (powers_of_two) { tail &= self.buf.len - 1; @@ -342,10 +345,10 @@ pub fn LinearFifo( } } - /// Peek at the item at `offset` - pub fn peekItem(self: Self, offset: usize) error{EndOfStream}!T { - if (offset >= self.count) - return error.EndOfStream; + /// Returns the item at `offset`. + /// Asserts offset is within bounds. + pub fn peekItem(self: Self, offset: usize) T { + assert(offset < self.count); var index = self.head + offset; if (powers_of_two) { @@ -369,18 +372,18 @@ test "LinearFifo(u8, .Dynamic)" { { var i: usize = 0; while (i < 5) : (i += 1) { - try fifo.write(&[_]u8{try fifo.peekItem(i)}); + try fifo.write(&[_]u8{fifo.peekItem(i)}); } testing.expectEqual(@as(usize, 10), fifo.readableLength()); testing.expectEqualSlices(u8, "HELLOHELLO", fifo.readableSlice(0)); } { - testing.expectEqual(@as(u8, 'H'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'E'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'L'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'L'), try fifo.readItem()); - testing.expectEqual(@as(u8, 'O'), try fifo.readItem()); + testing.expectEqual(@as(u8, 'H'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'E'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'L'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'L'), fifo.readItem().?); + testing.expectEqual(@as(u8, 'O'), fifo.readItem().?); } testing.expectEqual(@as(usize, 5), fifo.readableLength()); @@ -451,11 +454,11 @@ test "LinearFifo" { testing.expectEqual(@as(usize, 5), fifo.readableLength()); { - testing.expectEqual(@as(T, 0), try fifo.readItem()); - testing.expectEqual(@as(T, 1), try fifo.readItem()); - testing.expectEqual(@as(T, 1), try fifo.readItem()); - testing.expectEqual(@as(T, 0), try fifo.readItem()); - testing.expectEqual(@as(T, 1), try fifo.readItem()); + testing.expectEqual(@as(T, 0), fifo.readItem().?); + testing.expectEqual(@as(T, 1), fifo.readItem().?); + testing.expectEqual(@as(T, 1), fifo.readItem().?); + testing.expectEqual(@as(T, 0), fifo.readItem().?); + testing.expectEqual(@as(T, 1), fifo.readItem().?); testing.expectEqual(@as(usize, 0), fifo.readableLength()); } diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index 383a2be12d..3ea147679d 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -527,6 +527,33 @@ pub const File = struct { } } + pub const CopyRangeError = PWriteError || PReadError; + + pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) CopyRangeError!usize { + // TODO take advantage of copy_file_range OS APIs + var buf: [8 * 4096]u8 = undefined; + const adjusted_count = math.min(buf.len, len); + const amt_read = try in.pread(buf[0..adjusted_count], in_offset); + if (amt_read == 0) return @as(usize, 0); + return out.pwrite(buf[0..amt_read], out_offset); + } + + /// Returns the number of bytes copied. If the number read is smaller than `buffer.len`, it + /// means the in file reached the end. Reaching the end of a file is not an error condition. + pub fn copyRangeAll(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) CopyRangeError!usize { + var total_bytes_copied: usize = 0; + var in_off = in_offset; + var out_off = out_offset; + while (total_bytes_copied < len) { + const amt_copied = try copyRange(in, in_off, out, out_off, len - total_bytes_copied); + if (amt_copied == 0) return total_bytes_copied; + total_bytes_copied += amt_copied; + in_off += amt_copied; + out_off += amt_copied; + } + return total_bytes_copied; + } + pub const WriteFileOptions = struct { in_offset: u64 = 0, diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 47cc2f91e9..e2addb9b38 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -10,7 +10,7 @@ const Wyhash = std.hash.Wyhash; const Allocator = mem.Allocator; const builtin = @import("builtin"); -const want_modification_safety = builtin.mode != .ReleaseFast; +const want_modification_safety = std.debug.runtime_safety; const debug_u32 = if (want_modification_safety) u32 else void; pub fn AutoHashMap(comptime K: type, comptime V: type) type { @@ -219,6 +219,10 @@ pub fn HashMap(comptime K: type, comptime V: type, comptime hash: fn (key: K) u3 return put_result.old_kv; } + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + assert(self.putAssumeCapacity(key, value) == null); + } + pub fn get(hm: *const Self, key: K) ?*KV { if (hm.entries.len == 0) { return null; diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 3e00ca5d59..6bbb688ef0 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -11,6 +11,7 @@ const maxInt = std.math.maxInt; pub const LoggingAllocator = @import("heap/logging_allocator.zig").LoggingAllocator; pub const loggingAllocator = @import("heap/logging_allocator.zig").loggingAllocator; +pub const ArenaAllocator = @import("heap/arena_allocator.zig").ArenaAllocator; const Allocator = mem.Allocator; @@ -510,95 +511,6 @@ pub const HeapAllocator = switch (builtin.os.tag) { else => @compileError("Unsupported OS"), }; -/// This allocator takes an existing allocator, wraps it, and provides an interface -/// where you can allocate without freeing, and then free it all together. -pub const ArenaAllocator = struct { - allocator: Allocator, - - child_allocator: *Allocator, - buffer_list: std.SinglyLinkedList([]u8), - end_index: usize, - - const BufNode = std.SinglyLinkedList([]u8).Node; - - pub fn init(child_allocator: *Allocator) ArenaAllocator { - return ArenaAllocator{ - .allocator = Allocator{ - .reallocFn = realloc, - .shrinkFn = shrink, - }, - .child_allocator = child_allocator, - .buffer_list = std.SinglyLinkedList([]u8).init(), - .end_index = 0, - }; - } - - pub fn deinit(self: ArenaAllocator) void { - var it = self.buffer_list.first; - while (it) |node| { - // this has to occur before the free because the free frees node - const next_it = node.next; - self.child_allocator.free(node.data); - it = next_it; - } - } - - fn createNode(self: *ArenaAllocator, prev_len: usize, minimum_size: usize) !*BufNode { - const actual_min_size = minimum_size + @sizeOf(BufNode); - var len = prev_len; - while (true) { - len += len / 2; - len += mem.page_size - @rem(len, mem.page_size); - if (len >= actual_min_size) break; - } - const buf = try self.child_allocator.alignedAlloc(u8, @alignOf(BufNode), len); - const buf_node_slice = mem.bytesAsSlice(BufNode, buf[0..@sizeOf(BufNode)]); - const buf_node = &buf_node_slice[0]; - buf_node.* = BufNode{ - .data = buf, - .next = null, - }; - self.buffer_list.prepend(buf_node); - self.end_index = 0; - return buf_node; - } - - fn alloc(allocator: *Allocator, n: usize, alignment: u29) ![]u8 { - const self = @fieldParentPtr(ArenaAllocator, "allocator", allocator); - - var cur_node = if (self.buffer_list.first) |first_node| first_node else try self.createNode(0, n + alignment); - while (true) { - const cur_buf = cur_node.data[@sizeOf(BufNode)..]; - const addr = @ptrToInt(cur_buf.ptr) + self.end_index; - const adjusted_addr = mem.alignForward(addr, alignment); - const adjusted_index = self.end_index + (adjusted_addr - addr); - const new_end_index = adjusted_index + n; - if (new_end_index > cur_buf.len) { - cur_node = try self.createNode(cur_buf.len, n + alignment); - continue; - } - const result = cur_buf[adjusted_index..new_end_index]; - self.end_index = new_end_index; - return result; - } - } - - fn realloc(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) ![]u8 { - if (new_size <= old_mem.len and new_align <= new_size) { - // We can't do anything with the memory, so tell the client to keep it. - return error.OutOfMemory; - } else { - const result = try alloc(allocator, new_size, new_align); - @memcpy(result.ptr, old_mem.ptr, std.math.min(old_mem.len, result.len)); - return result; - } - } - - fn shrink(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) []u8 { - return old_mem[0..new_size]; - } -}; - pub const FixedBufferAllocator = struct { allocator: Allocator, end_index: usize, diff --git a/lib/std/heap/arena_allocator.zig b/lib/std/heap/arena_allocator.zig new file mode 100644 index 0000000000..daed17d6b3 --- /dev/null +++ b/lib/std/heap/arena_allocator.zig @@ -0,0 +1,102 @@ +const std = @import("../std.zig"); +const assert = std.debug.assert; +const mem = std.mem; +const Allocator = std.mem.Allocator; + +/// This allocator takes an existing allocator, wraps it, and provides an interface +/// where you can allocate without freeing, and then free it all together. +pub const ArenaAllocator = struct { + allocator: Allocator, + + child_allocator: *Allocator, + state: State, + + /// Inner state of ArenaAllocator. Can be stored rather than the entire ArenaAllocator + /// as a memory-saving optimization. + pub const State = struct { + buffer_list: std.SinglyLinkedList([]u8) = @as(std.SinglyLinkedList([]u8), .{}), + end_index: usize = 0, + + pub fn promote(self: State, child_allocator: *Allocator) ArenaAllocator { + return .{ + .allocator = Allocator{ + .reallocFn = realloc, + .shrinkFn = shrink, + }, + .child_allocator = child_allocator, + .state = self, + }; + } + }; + + const BufNode = std.SinglyLinkedList([]u8).Node; + + pub fn init(child_allocator: *Allocator) ArenaAllocator { + return (State{}).promote(child_allocator); + } + + pub fn deinit(self: ArenaAllocator) void { + var it = self.state.buffer_list.first; + while (it) |node| { + // this has to occur before the free because the free frees node + const next_it = node.next; + self.child_allocator.free(node.data); + it = next_it; + } + } + + fn createNode(self: *ArenaAllocator, prev_len: usize, minimum_size: usize) !*BufNode { + const actual_min_size = minimum_size + @sizeOf(BufNode); + var len = prev_len; + while (true) { + len += len / 2; + len += mem.page_size - @rem(len, mem.page_size); + if (len >= actual_min_size) break; + } + const buf = try self.child_allocator.alignedAlloc(u8, @alignOf(BufNode), len); + const buf_node_slice = mem.bytesAsSlice(BufNode, buf[0..@sizeOf(BufNode)]); + const buf_node = &buf_node_slice[0]; + buf_node.* = BufNode{ + .data = buf, + .next = null, + }; + self.state.buffer_list.prepend(buf_node); + self.state.end_index = 0; + return buf_node; + } + + fn alloc(allocator: *Allocator, n: usize, alignment: u29) ![]u8 { + const self = @fieldParentPtr(ArenaAllocator, "allocator", allocator); + + var cur_node = if (self.state.buffer_list.first) |first_node| first_node else try self.createNode(0, n + alignment); + while (true) { + const cur_buf = cur_node.data[@sizeOf(BufNode)..]; + const addr = @ptrToInt(cur_buf.ptr) + self.state.end_index; + const adjusted_addr = mem.alignForward(addr, alignment); + const adjusted_index = self.state.end_index + (adjusted_addr - addr); + const new_end_index = adjusted_index + n; + if (new_end_index > cur_buf.len) { + cur_node = try self.createNode(cur_buf.len, n + alignment); + continue; + } + const result = cur_buf[adjusted_index..new_end_index]; + self.state.end_index = new_end_index; + return result; + } + } + + fn realloc(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) ![]u8 { + if (new_size <= old_mem.len and new_align <= new_size) { + // We can't do anything with the memory, so tell the client to keep it. + return error.OutOfMemory; + } else { + const result = try alloc(allocator, new_size, new_align); + @memcpy(result.ptr, old_mem.ptr, std.math.min(old_mem.len, result.len)); + return result; + } + } + + fn shrink(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) []u8 { + return old_mem[0..new_size]; + } +}; diff --git a/lib/std/linked_list.zig b/lib/std/linked_list.zig index 23201dbf94..50acbb2c9f 100644 --- a/lib/std/linked_list.zig +++ b/lib/std/linked_list.zig @@ -49,7 +49,7 @@ pub fn SinglyLinkedList(comptime T: type) type { } }; - first: ?*Node, + first: ?*Node = null, /// Initialize a linked list. /// diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 86fe85753a..cfd3fd38d8 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -279,6 +279,21 @@ pub const Allocator = struct { const shrink_result = self.shrinkFn(self, non_const_ptr[0..bytes_len], Slice.alignment, 0, 1); assert(shrink_result.len == 0); } + + /// Copies `m` to newly allocated memory. Caller owns the memory. + pub fn dupe(allocator: *Allocator, comptime T: type, m: []const T) ![]T { + const new_buf = try allocator.alloc(T, m.len); + copy(T, new_buf, m); + return new_buf; + } + + /// Copies `m` to newly allocated memory, with a null-terminated element. Caller owns the memory. + pub fn dupeZ(allocator: *Allocator, comptime T: type, m: []const T) ![:0]T { + const new_buf = try allocator.alloc(T, m.len + 1); + copy(T, new_buf, m); + new_buf[m.len] = 0; + return new_buf[0..m.len :0]; + } }; var failAllocator = Allocator { @@ -785,19 +800,14 @@ pub fn allEqual(comptime T: type, slice: []const T, scalar: T) bool { return true; } -/// Copies `m` to newly allocated memory. Caller owns the memory. +/// Deprecated, use `Allocator.dupe`. pub fn dupe(allocator: *Allocator, comptime T: type, m: []const T) ![]T { - const new_buf = try allocator.alloc(T, m.len); - copy(T, new_buf, m); - return new_buf; + return allocator.dupe(T, m); } -/// Copies `m` to newly allocated memory, with a null-terminated element. Caller owns the memory. +/// Deprecated, use `Allocator.dupeZ`. pub fn dupeZ(allocator: *Allocator, comptime T: type, m: []const T) ![:0]T { - const new_buf = try allocator.alloc(T, m.len + 1); - copy(T, new_buf, m); - new_buf[m.len] = 0; - return new_buf[0..m.len :0]; + return allocator.dupeZ(T, m); } /// Remove values from the beginning of a slice. @@ -2112,7 +2122,11 @@ pub fn alignBackwardGeneric(comptime T: type, addr: T, alignment: T) T { /// Given an address and an alignment, return true if the address is a multiple of the alignment /// The alignment must be a power of 2 and greater than 0. pub fn isAligned(addr: usize, alignment: usize) bool { - return alignBackward(addr, alignment) == addr; + return isAlignedGeneric(u64, addr, alignment); +} + +pub fn isAlignedGeneric(comptime T: type, addr: T, alignment: T) bool { + return alignBackwardGeneric(T, addr, alignment) == addr; } test "isAligned" { diff --git a/lib/std/std.zig b/lib/std/std.zig index cd6f347429..376c200200 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -1,6 +1,8 @@ -pub const AlignedArrayList = @import("array_list.zig").AlignedArrayList; pub const ArrayList = @import("array_list.zig").ArrayList; +pub const ArrayListAligned = @import("array_list.zig").ArrayListAligned; +pub const ArrayListAlignedUnmanaged = @import("array_list.zig").ArrayListAlignedUnmanaged; pub const ArrayListSentineled = @import("array_list_sentineled.zig").ArrayListSentineled; +pub const ArrayListUnmanaged = @import("array_list.zig").ArrayListUnmanaged; pub const AutoHashMap = @import("hash_map.zig").AutoHashMap; pub const BloomFilter = @import("bloom_filter.zig").BloomFilter; pub const BufMap = @import("buf_map.zig").BufMap; diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig new file mode 100644 index 0000000000..1ff551ecbf --- /dev/null +++ b/src-self-hosted/Module.zig @@ -0,0 +1,2091 @@ +const std = @import("std"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; +const Value = @import("value.zig").Value; +const Type = @import("type.zig").Type; +const TypedValue = @import("TypedValue.zig"); +const assert = std.debug.assert; +const BigIntConst = std.math.big.int.Const; +const BigIntMutable = std.math.big.int.Mutable; +const Target = std.Target; +const Package = @import("Package.zig"); +const link = @import("link.zig"); +const ir = @import("ir.zig"); +const zir = @import("zir.zig"); +const Module = @This(); +const Inst = ir.Inst; + +/// General-purpose allocator. +allocator: *Allocator, +/// Pointer to externally managed resource. +root_pkg: *Package, +/// Module owns this resource. +root_scope: *Scope.ZIRModule, +bin_file: link.ElfFile, +bin_file_dir: std.fs.Dir, +bin_file_path: []const u8, +/// It's rare for a decl to be exported, so we save memory by having a sparse map of +/// Decl pointers to details about them being exported. +/// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. +decl_exports: std.AutoHashMap(*Decl, []*Export), +/// This models the Decls that perform exports, so that `decl_exports` can be updated when a Decl +/// is modified. Note that the key of this table is not the Decl being exported, but the Decl that +/// is performing the export of another Decl. +/// This table owns the Export memory. +export_owners: std.AutoHashMap(*Decl, []*Export), +/// Maps fully qualified namespaced names to the Decl struct for them. +decl_table: std.AutoHashMap(Decl.Hash, *Decl), + +optimize_mode: std.builtin.Mode, +link_error_flags: link.ElfFile.ErrorFlags = link.ElfFile.ErrorFlags{}, + +work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic), + +/// We optimize memory usage for a compilation with no compile errors by storing the +/// error messages and mapping outside of `Decl`. +/// The ErrorMsg memory is owned by the decl, using Module's allocator. +/// Note that a Decl can succeed but the Fn it represents can fail. In this case, +/// a Decl can have a failed_decls entry but have analysis status of success. +failed_decls: std.AutoHashMap(*Decl, *ErrorMsg), +/// Using a map here for consistency with the other fields here. +/// The ErrorMsg memory is owned by the `Scope.ZIRModule`, using Module's allocator. +failed_files: std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg), +/// Using a map here for consistency with the other fields here. +/// The ErrorMsg memory is owned by the `Export`, using Module's allocator. +failed_exports: std.AutoHashMap(*Export, *ErrorMsg), + +pub const WorkItem = union(enum) { + /// Write the machine code for a Decl to the output file. + codegen_decl: *Decl, +}; + +pub const Export = struct { + options: std.builtin.ExportOptions, + /// Byte offset into the file that contains the export directive. + src: usize, + /// Represents the position of the export, if any, in the output file. + link: link.ElfFile.Export, + /// The Decl that performs the export. Note that this is *not* the Decl being exported. + owner_decl: *Decl, + status: enum { + in_progress, + failed, + /// Indicates that the failure was due to a temporary issue, such as an I/O error + /// when writing to the output file. Retrying the export may succeed. + failed_retryable, + complete, + }, +}; + +pub const Decl = struct { + /// This name is relative to the containing namespace of the decl. It uses a null-termination + /// to save bytes, since there can be a lot of decls in a compilation. The null byte is not allowed + /// in symbol names, because executable file formats use null-terminated strings for symbol names. + /// All Decls have names, even values that are not bound to a zig namespace. This is necessary for + /// mapping them to an address in the output file. + /// Memory owned by this decl, using Module's allocator. + name: [*:0]const u8, + /// The direct parent container of the Decl. This field will need to get more fleshed out when + /// self-hosted supports proper struct types and Zig AST => ZIR. + /// Reference to externally owned memory. + scope: *Scope.ZIRModule, + /// Byte offset into the source file that contains this declaration. + /// This is the base offset that src offsets within this Decl are relative to. + src: usize, + /// The most recent value of the Decl after a successful semantic analysis. + /// The tag for this union is determined by the tag value of the analysis field. + typed_value: union { + never_succeeded: void, + most_recent: TypedValue.Managed, + }, + /// Represents the "shallow" analysis status. For example, for decls that are functions, + /// the function type is analyzed with this set to `in_progress`, however, the semantic + /// analysis of the function body is performed with this value set to `success`. Functions + /// have their own analysis status field. + analysis: enum { + initial_in_progress, + /// This Decl might be OK but it depends on another one which did not successfully complete + /// semantic analysis. This Decl never had a value computed. + initial_dependency_failure, + /// Semantic analysis failure. This Decl never had a value computed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + initial_sema_failure, + /// In this case the `typed_value.most_recent` can still be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + codegen_failure, + /// In this case the `typed_value.most_recent` can still be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + /// This indicates the failure was something like running out of disk space, + /// and attempting codegen again may succeed. + codegen_failure_retryable, + /// This Decl might be OK but it depends on another one which did not successfully complete + /// semantic analysis. There is a most recent value available. + repeat_dependency_failure, + /// Semantic anlaysis failure, but the `typed_value.most_recent` can be accessed. + /// There will be a corresponding ErrorMsg in Module.failed_decls. + repeat_sema_failure, + /// Completed successfully before; the `typed_value.most_recent` can be accessed, and + /// new semantic analysis is in progress. + repeat_in_progress, + /// Everything is done and updated. + complete, + }, + + /// Represents the position of the code in the output file. + /// This is populated regardless of semantic analysis and code generation. + link: link.ElfFile.Decl = link.ElfFile.Decl.empty, + + /// The shallow set of other decls whose typed_value could possibly change if this Decl's + /// typed_value is modified. + /// TODO look into using a lightweight map/set data structure rather than a linear array. + dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){}, + + contents_hash: Hash, + + pub fn destroy(self: *Decl, allocator: *Allocator) void { + allocator.free(mem.spanZ(self.name)); + if (self.typedValueManaged()) |tvm| { + tvm.deinit(allocator); + } + allocator.destroy(self); + } + + pub const Hash = [16]u8; + + /// If the name is small enough, it is used directly as the hash. + /// If it is long, blake3 hash is computed. + pub fn hashSimpleName(name: []const u8) Hash { + var out: Hash = undefined; + if (name.len <= Hash.len) { + mem.copy(u8, &out, name); + mem.set(u8, out[name.len..], 0); + } else { + std.crypto.Blake3.hash(name, &out); + } + return out; + } + + /// Must generate unique bytes with no collisions with other decls. + /// The point of hashing here is only to limit the number of bytes of + /// the unique identifier to a fixed size (16 bytes). + pub fn fullyQualifiedNameHash(self: Decl) Hash { + // Right now we only have ZIRModule as the source. So this is simply the + // relative name of the decl. + return hashSimpleName(mem.spanZ(u8, self.name)); + } + + pub fn typedValue(self: *Decl) error{AnalysisFail}!TypedValue { + const tvm = self.typedValueManaged() orelse return error.AnalysisFail; + return tvm.typed_value; + } + + pub fn value(self: *Decl) error{AnalysisFail}!Value { + return (try self.typedValue()).val; + } + + pub fn dump(self: *Decl) void { + const loc = std.zig.findLineColumn(self.scope.source.bytes, self.src); + std.debug.warn("{}:{}:{} name={} status={}", .{ + self.scope.sub_file_path, + loc.line + 1, + loc.column + 1, + mem.spanZ(self.name), + @tagName(self.analysis), + }); + if (self.typedValueManaged()) |tvm| { + std.debug.warn(" ty={} val={}", .{ tvm.typed_value.ty, tvm.typed_value.val }); + } + std.debug.warn("\n", .{}); + } + + fn typedValueManaged(self: *Decl) ?*TypedValue.Managed { + switch (self.analysis) { + .initial_in_progress, + .initial_dependency_failure, + .initial_sema_failure, + => return null, + .codegen_failure, + .codegen_failure_retryable, + .repeat_dependency_failure, + .repeat_sema_failure, + .repeat_in_progress, + .complete, + => return &self.typed_value.most_recent, + } + } +}; + +/// Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator. +pub const Fn = struct { + /// This memory owned by the Decl's TypedValue.Managed arena allocator. + fn_type: Type, + analysis: union(enum) { + /// The value is the source instruction. + queued: *zir.Inst.Fn, + in_progress: *Analysis, + /// There will be a corresponding ErrorMsg in Module.failed_decls + sema_failure, + /// This Fn might be OK but it depends on another Decl which did not successfully complete + /// semantic analysis. + dependency_failure, + success: Body, + }, + + /// This memory is temporary and points to stack memory for the duration + /// of Fn analysis. + pub const Analysis = struct { + inner_block: Scope.Block, + /// TODO Performance optimization idea: instead of this inst_table, + /// use a field in the zir.Inst instead to track corresponding instructions + inst_table: std.AutoHashMap(*zir.Inst, *Inst), + needed_inst_capacity: usize, + }; +}; + +pub const Scope = struct { + tag: Tag, + + pub fn cast(base: *Scope, comptime T: type) ?*T { + if (base.tag != T.base_tag) + return null; + + return @fieldParentPtr(T, "base", base); + } + + /// Asserts the scope has a parent which is a DeclAnalysis and + /// returns the arena Allocator. + pub fn arena(self: *Scope) *Allocator { + switch (self.tag) { + .block => return self.cast(Block).?.arena, + .decl => return &self.cast(DeclAnalysis).?.arena.allocator, + .zir_module => return &self.cast(ZIRModule).?.contents.module.arena.allocator, + } + } + + /// Asserts the scope has a parent which is a DeclAnalysis and + /// returns the Decl. + pub fn decl(self: *Scope) *Decl { + switch (self.tag) { + .block => return self.cast(Block).?.decl, + .decl => return self.cast(DeclAnalysis).?.decl, + .zir_module => unreachable, + } + } + + /// Asserts the scope has a parent which is a ZIRModule and + /// returns it. + pub fn namespace(self: *Scope) *ZIRModule { + switch (self.tag) { + .block => return self.cast(Block).?.decl.scope, + .decl => return self.cast(DeclAnalysis).?.decl.scope, + .zir_module => return self.cast(ZIRModule).?, + } + } + + pub fn dumpInst(self: *Scope, inst: *Inst) void { + const zir_module = self.namespace(); + const loc = std.zig.findLineColumn(zir_module.source.bytes, inst.src); + std.debug.warn("{}:{}:{}: {}: ty={}\n", .{ + zir_module.sub_file_path, + loc.line + 1, + loc.column + 1, + @tagName(inst.tag), + inst.ty, + }); + } + + pub const Tag = enum { + zir_module, + block, + decl, + }; + + pub const ZIRModule = struct { + pub const base_tag: Tag = .zir_module; + base: Scope = Scope{ .tag = base_tag }, + /// Relative to the owning package's root_src_dir. + /// Reference to external memory, not owned by ZIRModule. + sub_file_path: []const u8, + source: union(enum) { + unloaded: void, + bytes: [:0]const u8, + }, + contents: union { + not_available: void, + module: *zir.Module, + }, + status: enum { + never_loaded, + unloaded_success, + unloaded_parse_failure, + unloaded_sema_failure, + + loaded_sema_failure, + loaded_success, + }, + + pub fn unload(self: *ZIRModule, allocator: *Allocator) void { + switch (self.status) { + .never_loaded, + .unloaded_parse_failure, + .unloaded_sema_failure, + .unloaded_success, + => {}, + + .loaded_success => { + self.contents.module.deinit(allocator); + allocator.destroy(self.contents.module); + self.status = .unloaded_success; + }, + .loaded_sema_failure => { + self.contents.module.deinit(allocator); + allocator.destroy(self.contents.module); + self.status = .unloaded_sema_failure; + }, + } + switch (self.source) { + .bytes => |bytes| { + allocator.free(bytes); + self.source = .{ .unloaded = {} }; + }, + .unloaded => {}, + } + } + + pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + self.unload(allocator); + self.* = undefined; + } + + pub fn dumpSrc(self: *ZIRModule, src: usize) void { + const loc = std.zig.findLineColumn(self.source.bytes, src); + std.debug.warn("{}:{}:{}\n", .{ self.sub_file_path, loc.line + 1, loc.column + 1 }); + } + }; + + /// This is a temporary structure, references to it are valid only + /// during semantic analysis of the block. + pub const Block = struct { + pub const base_tag: Tag = .block; + base: Scope = Scope{ .tag = base_tag }, + func: *Fn, + decl: *Decl, + instructions: ArrayListUnmanaged(*Inst), + /// Points to the arena allocator of DeclAnalysis + arena: *Allocator, + }; + + /// This is a temporary structure, references to it are valid only + /// during semantic analysis of the decl. + pub const DeclAnalysis = struct { + pub const base_tag: Tag = .decl; + base: Scope = Scope{ .tag = base_tag }, + decl: *Decl, + arena: std.heap.ArenaAllocator, + }; +}; + +pub const Body = struct { + instructions: []*Inst, +}; + +pub const AllErrors = struct { + arena: std.heap.ArenaAllocator.State, + list: []const Message, + + pub const Message = struct { + src_path: []const u8, + line: usize, + column: usize, + byte_offset: usize, + msg: []const u8, + }; + + pub fn deinit(self: *AllErrors, allocator: *Allocator) void { + self.arena.promote(allocator).deinit(); + } + + fn add( + arena: *std.heap.ArenaAllocator, + errors: *std.ArrayList(Message), + sub_file_path: []const u8, + source: []const u8, + simple_err_msg: ErrorMsg, + ) !void { + const loc = std.zig.findLineColumn(source, simple_err_msg.byte_offset); + try errors.append(.{ + .src_path = try arena.allocator.dupe(u8, sub_file_path), + .msg = try arena.allocator.dupe(u8, simple_err_msg.msg), + .byte_offset = simple_err_msg.byte_offset, + .line = loc.line, + .column = loc.column, + }); + } +}; + +pub const InitOptions = struct { + target: std.Target, + root_pkg: *Package, + output_mode: std.builtin.OutputMode, + bin_file_dir: ?std.fs.Dir = null, + bin_file_path: []const u8, + link_mode: ?std.builtin.LinkMode = null, + object_format: ?std.builtin.ObjectFormat = null, + optimize_mode: std.builtin.Mode = .Debug, +}; + +pub fn init(gpa: *Allocator, options: InitOptions) !Module { + const root_scope = try gpa.create(Scope.ZIRModule); + errdefer gpa.destroy(root_scope); + + root_scope.* = .{ + .sub_file_path = options.root_pkg.root_src_path, + .source = .{ .unloaded = {} }, + .contents = .{ .not_available = {} }, + .status = .never_loaded, + }; + + const bin_file_dir = options.bin_file_dir orelse std.fs.cwd(); + var bin_file = try link.openBinFilePath(gpa, bin_file_dir, options.bin_file_path, .{ + .target = options.target, + .output_mode = options.output_mode, + .link_mode = options.link_mode orelse .Static, + .object_format = options.object_format orelse options.target.getObjectFormat(), + }); + errdefer bin_file.deinit(); + + return Module{ + .allocator = gpa, + .root_pkg = options.root_pkg, + .root_scope = root_scope, + .bin_file_dir = bin_file_dir, + .bin_file_path = options.bin_file_path, + .bin_file = bin_file, + .optimize_mode = options.optimize_mode, + .decl_table = std.AutoHashMap(Decl.Hash, *Decl).init(gpa), + .decl_exports = std.AutoHashMap(*Decl, []*Export).init(gpa), + .export_owners = std.AutoHashMap(*Decl, []*Export).init(gpa), + .failed_decls = std.AutoHashMap(*Decl, *ErrorMsg).init(gpa), + .failed_files = std.AutoHashMap(*Scope.ZIRModule, *ErrorMsg).init(gpa), + .failed_exports = std.AutoHashMap(*Export, *ErrorMsg).init(gpa), + .work_queue = std.fifo.LinearFifo(WorkItem, .Dynamic).init(gpa), + }; +} + +pub fn deinit(self: *Module) void { + self.bin_file.deinit(); + const allocator = self.allocator; + self.work_queue.deinit(); + { + var it = self.decl_table.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.decl_table.deinit(); + } + { + var it = self.failed_decls.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_decls.deinit(); + } + { + var it = self.failed_files.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_files.deinit(); + } + { + var it = self.failed_exports.iterator(); + while (it.next()) |kv| { + kv.value.destroy(allocator); + } + self.failed_exports.deinit(); + } + { + var it = self.decl_exports.iterator(); + while (it.next()) |kv| { + const export_list = kv.value; + allocator.free(export_list); + } + self.decl_exports.deinit(); + } + { + var it = self.export_owners.iterator(); + while (it.next()) |kv| { + const export_list = kv.value; + for (export_list) |exp| { + allocator.destroy(exp); + } + allocator.free(export_list); + } + self.export_owners.deinit(); + } + { + self.root_scope.deinit(allocator); + allocator.destroy(self.root_scope); + } + self.* = undefined; +} + +pub fn target(self: Module) std.Target { + return self.bin_file.options.target; +} + +/// Detect changes to source files, perform semantic analysis, and update the output files. +pub fn update(self: *Module) !void { + // TODO Use the cache hash file system to detect which source files changed. + // Here we simulate a full cache miss. + // Analyze the root source file now. + self.analyzeRoot(self.root_scope) catch |err| switch (err) { + error.AnalysisFail => { + assert(self.totalErrorCount() != 0); + }, + else => |e| return e, + }; + + try self.performAllTheWork(); + + // Unload all the source files from memory. + self.root_scope.unload(self.allocator); + + try self.bin_file.flush(); + self.link_error_flags = self.bin_file.error_flags; +} + +/// Having the file open for writing is problematic as far as executing the +/// binary is concerned. This will remove the write flag, or close the file, +/// or whatever is needed so that it can be executed. +/// After this, one must call` makeFileWritable` before calling `update`. +pub fn makeBinFileExecutable(self: *Module) !void { + return self.bin_file.makeExecutable(); +} + +pub fn makeBinFileWritable(self: *Module) !void { + return self.bin_file.makeWritable(self.bin_file_dir, self.bin_file_path); +} + +pub fn totalErrorCount(self: *Module) usize { + return self.failed_decls.size + + self.failed_files.size + + self.failed_exports.size + + @boolToInt(self.link_error_flags.no_entry_point_found); +} + +pub fn getAllErrorsAlloc(self: *Module) !AllErrors { + var arena = std.heap.ArenaAllocator.init(self.allocator); + errdefer arena.deinit(); + + var errors = std.ArrayList(AllErrors.Message).init(self.allocator); + defer errors.deinit(); + + { + var it = self.failed_files.iterator(); + while (it.next()) |kv| { + const scope = kv.key; + const err_msg = kv.value; + const source = try self.getSource(scope); + try AllErrors.add(&arena, &errors, scope.sub_file_path, source, err_msg.*); + } + } + { + var it = self.failed_decls.iterator(); + while (it.next()) |kv| { + const decl = kv.key; + const err_msg = kv.value; + const source = try self.getSource(decl.scope); + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); + } + } + { + var it = self.failed_exports.iterator(); + while (it.next()) |kv| { + const decl = kv.key.owner_decl; + const err_msg = kv.value; + const source = try self.getSource(decl.scope); + try AllErrors.add(&arena, &errors, decl.scope.sub_file_path, source, err_msg.*); + } + } + + if (self.link_error_flags.no_entry_point_found) { + try errors.append(.{ + .src_path = self.root_pkg.root_src_path, + .line = 0, + .column = 0, + .byte_offset = 0, + .msg = try std.fmt.allocPrint(&arena.allocator, "no entry point found", .{}), + }); + } + + assert(errors.items.len == self.totalErrorCount()); + + return AllErrors{ + .arena = arena.state, + .list = try arena.allocator.dupe(AllErrors.Message, errors.items), + }; +} + +const InnerError = error{ OutOfMemory, AnalysisFail }; + +pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void { + while (self.work_queue.readItem()) |work_item| switch (work_item) { + .codegen_decl => |decl| switch (decl.analysis) { + .initial_in_progress, + .repeat_in_progress, + => unreachable, + + .initial_sema_failure, + .repeat_sema_failure, + .codegen_failure, + .initial_dependency_failure, + .repeat_dependency_failure, + => continue, + + .complete, .codegen_failure_retryable => { + if (decl.typed_value.most_recent.typed_value.val.cast(Value.Payload.Function)) |payload| { + switch (payload.func.analysis) { + .queued => self.analyzeFnBody(decl, payload.func) catch |err| switch (err) { + error.AnalysisFail => { + if (payload.func.analysis == .queued) { + payload.func.analysis = .dependency_failure; + } + continue; + }, + else => |e| return e, + }, + .in_progress => unreachable, + .sema_failure, .dependency_failure => continue, + .success => {}, + } + } + + assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()); + + self.bin_file.updateDecl(self, decl) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + decl.analysis = .repeat_dependency_failure; + }, + else => { + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + self.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create( + self.allocator, + decl.src, + "unable to codegen: {}", + .{@errorName(err)}, + )); + decl.analysis = .codegen_failure_retryable; + }, + }; + }, + }, + }; +} + +fn getSource(self: *Module, root_scope: *Scope.ZIRModule) ![:0]const u8 { + switch (root_scope.source) { + .unloaded => { + const source = try self.root_pkg.root_src_dir.readFileAllocOptions( + self.allocator, + root_scope.sub_file_path, + std.math.maxInt(u32), + 1, + 0, + ); + root_scope.source = .{ .bytes = source }; + return source; + }, + .bytes => |bytes| return bytes, + } +} + +fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { + switch (root_scope.status) { + .never_loaded, .unloaded_success => { + try self.failed_files.ensureCapacity(self.failed_files.size + 1); + + const source = try self.getSource(root_scope); + + var keep_zir_module = false; + const zir_module = try self.allocator.create(zir.Module); + defer if (!keep_zir_module) self.allocator.destroy(zir_module); + + zir_module.* = try zir.parse(self.allocator, source); + defer if (!keep_zir_module) zir_module.deinit(self.allocator); + + if (zir_module.error_msg) |src_err_msg| { + self.failed_files.putAssumeCapacityNoClobber( + root_scope, + try ErrorMsg.create(self.allocator, src_err_msg.byte_offset, "{}", .{src_err_msg.msg}), + ); + root_scope.status = .unloaded_parse_failure; + return error.AnalysisFail; + } + + root_scope.status = .loaded_success; + root_scope.contents = .{ .module = zir_module }; + keep_zir_module = true; + + return zir_module; + }, + + .unloaded_parse_failure, + .unloaded_sema_failure, + => return error.AnalysisFail, + + .loaded_success, .loaded_sema_failure => return root_scope.contents.module, + } +} + +fn analyzeRoot(self: *Module, root_scope: *Scope.ZIRModule) !void { + // TODO use the cache to identify, from the modified source files, the decls which have + // changed based on the span of memory that represents the decl in the re-parsed source file. + // Use the cached dependency graph to recursively determine the set of decls which need + // regeneration. + // Here we simulate adding a source file which was previously not part of the compilation, + // which means scanning the decls looking for exports. + // TODO also identify decls that need to be deleted. + switch (root_scope.status) { + .never_loaded => { + const src_module = try self.getSrcModule(root_scope); + + // Here we ensure enough queue capacity to store all the decls, so that later we can use + // appendAssumeCapacity. + try self.work_queue.ensureUnusedCapacity(src_module.decls.len); + + for (src_module.decls) |decl| { + if (decl.cast(zir.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, + + .unloaded_parse_failure, + .unloaded_sema_failure, + .unloaded_success, + .loaded_sema_failure, + .loaded_success, + => { + const src_module = try self.getSrcModule(root_scope); + + // Look for changed decls. + for (src_module.decls) |src_decl| { + const name_hash = Decl.hashSimpleName(src_decl.name); + if (self.decl_table.get(name_hash)) |kv| { + const decl = kv.value; + const new_contents_hash = Decl.hashSimpleName(src_decl.contents); + if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) { + // TODO recursive dependency management + //std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); + self.decl_table.removeAssertDiscard(name_hash); + const saved_link = decl.link; + decl.destroy(self.allocator); + if (self.export_owners.getValue(decl)) |exports| { + @panic("TODO handle updating a decl that does an export"); + } + const new_decl = self.resolveDecl( + &root_scope.base, + src_decl, + saved_link, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => continue, + }; + if (self.decl_exports.remove(decl)) |entry| { + self.decl_exports.putAssumeCapacityNoClobber(new_decl, entry.value); + } + } + } else if (src_decl.cast(zir.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, + } +} + +fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { + // Use the Decl's arena for function memory. + var arena = decl.typed_value.most_recent.arena.?.promote(self.allocator); + defer decl.typed_value.most_recent.arena.?.* = arena.state; + var analysis: Fn.Analysis = .{ + .inner_block = .{ + .func = func, + .decl = decl, + .instructions = .{}, + .arena = &arena.allocator, + }, + .needed_inst_capacity = 0, + .inst_table = std.AutoHashMap(*zir.Inst, *Inst).init(self.allocator), + }; + defer analysis.inner_block.instructions.deinit(self.allocator); + defer analysis.inst_table.deinit(); + + const fn_inst = func.analysis.queued; + func.analysis = .{ .in_progress = &analysis }; + + try self.analyzeBody(&analysis.inner_block.base, fn_inst.positionals.body); + + func.analysis = .{ + .success = .{ + .instructions = try arena.allocator.dupe(*Inst, analysis.inner_block.instructions.items), + }, + }; +} + +fn resolveDecl( + self: *Module, + scope: *Scope, + old_inst: *zir.Inst, + bin_file_link: link.ElfFile.Decl, +) InnerError!*Decl { + const hash = Decl.hashSimpleName(old_inst.name); + if (self.decl_table.get(hash)) |kv| { + return kv.value; + } else { + const new_decl = blk: { + try self.decl_table.ensureCapacity(self.decl_table.size + 1); + const new_decl = try self.allocator.create(Decl); + errdefer self.allocator.destroy(new_decl); + const name = try mem.dupeZ(self.allocator, u8, old_inst.name); + errdefer self.allocator.free(name); + new_decl.* = .{ + .name = name, + .scope = scope.namespace(), + .src = old_inst.src, + .typed_value = .{ .never_succeeded = {} }, + .analysis = .initial_in_progress, + .contents_hash = Decl.hashSimpleName(old_inst.contents), + .link = bin_file_link, + }; + self.decl_table.putAssumeCapacityNoClobber(hash, new_decl); + break :blk new_decl; + }; + + var decl_scope: Scope.DeclAnalysis = .{ + .decl = new_decl, + .arena = std.heap.ArenaAllocator.init(self.allocator), + }; + errdefer decl_scope.arena.deinit(); + + const typed_value = self.analyzeInstConst(&decl_scope.base, old_inst) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + switch (new_decl.analysis) { + .initial_in_progress => new_decl.analysis = .initial_dependency_failure, + .repeat_in_progress => new_decl.analysis = .repeat_dependency_failure, + else => {}, + } + return error.AnalysisFail; + }, + }; + const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); + + const has_codegen_bits = typed_value.ty.hasCodeGenBits(); + if (has_codegen_bits) { + // We don't fully codegen the decl until later, but we do need to reserve a global + // offset table index for it. This allows us to codegen decls out of dependency order, + // increasing how many computations can be done in parallel. + try self.bin_file.allocateDeclIndexes(new_decl); + } + + arena_state.* = decl_scope.arena.state; + + new_decl.typed_value = .{ + .most_recent = .{ + .typed_value = typed_value, + .arena = arena_state, + }, + }; + new_decl.analysis = .complete; + if (has_codegen_bits) { + // We ensureCapacity when scanning for decls. + self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); + } + return new_decl; + } +} + +fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!*Decl { + const decl = try self.resolveDecl(scope, old_inst, link.ElfFile.Decl.empty); + switch (decl.analysis) { + .initial_in_progress => unreachable, + .repeat_in_progress => unreachable, + .initial_dependency_failure, + .repeat_dependency_failure, + .initial_sema_failure, + .repeat_sema_failure, + .codegen_failure, + .codegen_failure_retryable, + => return error.AnalysisFail, + + .complete => return decl, + } +} + +fn resolveInst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!*Inst { + if (scope.cast(Scope.Block)) |block| { + if (block.func.analysis.in_progress.inst_table.get(old_inst)) |kv| { + return kv.value; + } + } + + const decl = try self.resolveCompleteDecl(scope, old_inst); + const decl_ref = try self.analyzeDeclRef(scope, old_inst.src, decl); + return self.analyzeDeref(scope, old_inst.src, decl_ref, old_inst.src); +} + +fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { + return scope.cast(Scope.Block) orelse + return self.fail(scope, src, "instruction illegal outside function body", .{}); +} + +fn resolveInstConst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!TypedValue { + const new_inst = try self.resolveInst(scope, old_inst); + const val = try self.resolveConstValue(scope, new_inst); + return TypedValue{ + .ty = new_inst.ty, + .val = val, + }; +} + +fn resolveConstValue(self: *Module, scope: *Scope, base: *Inst) !Value { + return (try self.resolveDefinedValue(scope, base)) orelse + return self.fail(scope, base.src, "unable to resolve comptime value", .{}); +} + +fn resolveDefinedValue(self: *Module, scope: *Scope, base: *Inst) !?Value { + if (base.value()) |val| { + if (val.isUndef()) { + return self.fail(scope, base.src, "use of undefined value here causes undefined behavior", .{}); + } + return val; + } + return null; +} + +fn resolveConstString(self: *Module, scope: *Scope, old_inst: *zir.Inst) ![]u8 { + const new_inst = try self.resolveInst(scope, old_inst); + const wanted_type = Type.initTag(.const_slice_u8); + const coerced_inst = try self.coerce(scope, wanted_type, new_inst); + const val = try self.resolveConstValue(scope, coerced_inst); + return val.toAllocatedBytes(scope.arena()); +} + +fn resolveType(self: *Module, scope: *Scope, old_inst: *zir.Inst) !Type { + const new_inst = try self.resolveInst(scope, old_inst); + const wanted_type = Type.initTag(.@"type"); + const coerced_inst = try self.coerce(scope, wanted_type, new_inst); + const val = try self.resolveConstValue(scope, coerced_inst); + return val.toType(); +} + +fn analyzeExport(self: *Module, scope: *Scope, export_inst: *zir.Inst.Export) InnerError!void { + try self.decl_exports.ensureCapacity(self.decl_exports.size + 1); + try self.export_owners.ensureCapacity(self.export_owners.size + 1); + const symbol_name = try self.resolveConstString(scope, export_inst.positionals.symbol_name); + const exported_decl = try self.resolveCompleteDecl(scope, export_inst.positionals.value); + const typed_value = exported_decl.typed_value.most_recent.typed_value; + switch (typed_value.ty.zigTypeTag()) { + .Fn => {}, + else => return self.fail( + scope, + export_inst.positionals.value.src, + "unable to export type '{}'", + .{typed_value.ty}, + ), + } + const new_export = try self.allocator.create(Export); + errdefer self.allocator.destroy(new_export); + + const owner_decl = scope.decl(); + + new_export.* = .{ + .options = .{ .name = symbol_name }, + .src = export_inst.base.src, + .link = .{}, + .owner_decl = owner_decl, + .status = .in_progress, + }; + + // Add to export_owners table. + const eo_gop = self.export_owners.getOrPut(owner_decl) catch unreachable; + if (!eo_gop.found_existing) { + eo_gop.kv.value = &[0]*Export{}; + } + eo_gop.kv.value = try self.allocator.realloc(eo_gop.kv.value, eo_gop.kv.value.len + 1); + eo_gop.kv.value[eo_gop.kv.value.len - 1] = new_export; + errdefer eo_gop.kv.value = self.allocator.shrink(eo_gop.kv.value, eo_gop.kv.value.len - 1); + + // Add to exported_decl table. + const de_gop = self.decl_exports.getOrPut(exported_decl) catch unreachable; + if (!de_gop.found_existing) { + de_gop.kv.value = &[0]*Export{}; + } + de_gop.kv.value = try self.allocator.realloc(de_gop.kv.value, de_gop.kv.value.len + 1); + de_gop.kv.value[de_gop.kv.value.len - 1] = new_export; + errdefer de_gop.kv.value = self.allocator.shrink(de_gop.kv.value, de_gop.kv.value.len - 1); + + self.bin_file.updateDeclExports(self, exported_decl, de_gop.kv.value) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + try self.failed_exports.ensureCapacity(self.failed_exports.size + 1); + self.failed_exports.putAssumeCapacityNoClobber(new_export, try ErrorMsg.create( + self.allocator, + export_inst.base.src, + "unable to export: {}", + .{@errorName(err)}, + )); + new_export.status = .failed_retryable; + }, + }; +} + +/// TODO should not need the cast on the last parameter at the callsites +fn addNewInstArgs( + self: *Module, + block: *Scope.Block, + src: usize, + ty: Type, + comptime T: type, + args: Inst.Args(T), +) !*Inst { + const inst = try self.addNewInst(block, src, ty, T); + inst.args = args; + return &inst.base; +} + +fn addNewInst(self: *Module, block: *Scope.Block, src: usize, ty: Type, comptime T: type) !*T { + const inst = try block.arena.create(T); + inst.* = .{ + .base = .{ + .tag = T.base_tag, + .ty = ty, + .src = src, + }, + .args = undefined, + }; + try block.instructions.append(self.allocator, &inst.base); + return inst; +} + +fn constInst(self: *Module, scope: *Scope, src: usize, typed_value: TypedValue) !*Inst { + const const_inst = try scope.arena().create(Inst.Constant); + const_inst.* = .{ + .base = .{ + .tag = Inst.Constant.base_tag, + .ty = typed_value.ty, + .src = src, + }, + .val = typed_value.val, + }; + return &const_inst.base; +} + +fn constStr(self: *Module, scope: *Scope, src: usize, str: []const u8) !*Inst { + const ty_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); + ty_payload.* = .{ .len = str.len }; + + const bytes_payload = try scope.arena().create(Value.Payload.Bytes); + bytes_payload.* = .{ .data = str }; + + return self.constInst(scope, src, .{ + .ty = Type.initPayload(&ty_payload.base), + .val = Value.initPayload(&bytes_payload.base), + }); +} + +fn constType(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { + return self.constInst(scope, src, .{ + .ty = Type.initTag(.type), + .val = try ty.toValue(scope.arena()), + }); +} + +fn constVoid(self: *Module, scope: *Scope, src: usize) !*Inst { + return self.constInst(scope, src, .{ + .ty = Type.initTag(.void), + .val = Value.initTag(.the_one_possible_value), + }); +} + +fn constUndef(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initTag(.undef), + }); +} + +fn constBool(self: *Module, scope: *Scope, src: usize, v: bool) !*Inst { + return self.constInst(scope, src, .{ + .ty = Type.initTag(.bool), + .val = ([2]Value{ Value.initTag(.bool_false), Value.initTag(.bool_true) })[@boolToInt(v)], + }); +} + +fn constIntUnsigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: u64) !*Inst { + const int_payload = try scope.arena().create(Value.Payload.Int_u64); + int_payload.* = .{ .int = int }; + + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initPayload(&int_payload.base), + }); +} + +fn constIntSigned(self: *Module, scope: *Scope, src: usize, ty: Type, int: i64) !*Inst { + const int_payload = try scope.arena().create(Value.Payload.Int_i64); + int_payload.* = .{ .int = int }; + + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initPayload(&int_payload.base), + }); +} + +fn constIntBig(self: *Module, scope: *Scope, src: usize, ty: Type, big_int: BigIntConst) !*Inst { + const val_payload = if (big_int.positive) blk: { + if (big_int.to(u64)) |x| { + return self.constIntUnsigned(scope, src, ty, x); + } else |err| switch (err) { + error.NegativeIntoUnsigned => unreachable, + error.TargetTooSmall => {}, // handled below + } + const big_int_payload = try scope.arena().create(Value.Payload.IntBigPositive); + big_int_payload.* = .{ .limbs = big_int.limbs }; + break :blk &big_int_payload.base; + } else blk: { + if (big_int.to(i64)) |x| { + return self.constIntSigned(scope, src, ty, x); + } else |err| switch (err) { + error.NegativeIntoUnsigned => unreachable, + error.TargetTooSmall => {}, // handled below + } + const big_int_payload = try scope.arena().create(Value.Payload.IntBigNegative); + big_int_payload.* = .{ .limbs = big_int.limbs }; + break :blk &big_int_payload.base; + }; + + return self.constInst(scope, src, .{ + .ty = ty, + .val = Value.initPayload(val_payload), + }); +} + +fn analyzeInstConst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!TypedValue { + const new_inst = try self.analyzeInst(scope, old_inst); + return TypedValue{ + .ty = new_inst.ty, + .val = try self.resolveConstValue(scope, new_inst), + }; +} + +fn analyzeInst(self: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError!*Inst { + switch (old_inst.tag) { + .breakpoint => return self.analyzeInstBreakpoint(scope, old_inst.cast(zir.Inst.Breakpoint).?), + .call => return self.analyzeInstCall(scope, old_inst.cast(zir.Inst.Call).?), + .declref => return self.analyzeInstDeclRef(scope, old_inst.cast(zir.Inst.DeclRef).?), + .str => { + const bytes = old_inst.cast(zir.Inst.Str).?.positionals.bytes; + // The bytes references memory inside the ZIR module, which can get deallocated + // after semantic analysis is complete. We need the memory to be in the Decl's arena. + const arena_bytes = try scope.arena().dupe(u8, bytes); + return self.constStr(scope, old_inst.src, arena_bytes); + }, + .int => { + const big_int = old_inst.cast(zir.Inst.Int).?.positionals.int; + return self.constIntBig(scope, old_inst.src, Type.initTag(.comptime_int), big_int); + }, + .ptrtoint => return self.analyzeInstPtrToInt(scope, old_inst.cast(zir.Inst.PtrToInt).?), + .fieldptr => return self.analyzeInstFieldPtr(scope, old_inst.cast(zir.Inst.FieldPtr).?), + .deref => return self.analyzeInstDeref(scope, old_inst.cast(zir.Inst.Deref).?), + .as => return self.analyzeInstAs(scope, old_inst.cast(zir.Inst.As).?), + .@"asm" => return self.analyzeInstAsm(scope, old_inst.cast(zir.Inst.Asm).?), + .@"unreachable" => return self.analyzeInstUnreachable(scope, old_inst.cast(zir.Inst.Unreachable).?), + .@"return" => return self.analyzeInstRet(scope, old_inst.cast(zir.Inst.Return).?), + .@"fn" => return self.analyzeInstFn(scope, old_inst.cast(zir.Inst.Fn).?), + .@"export" => { + try self.analyzeExport(scope, old_inst.cast(zir.Inst.Export).?); + return self.constVoid(scope, old_inst.src); + }, + .primitive => return self.analyzeInstPrimitive(scope, old_inst.cast(zir.Inst.Primitive).?), + .ref => return self.analyzeInstRef(scope, old_inst.cast(zir.Inst.Ref).?), + .fntype => return self.analyzeInstFnType(scope, old_inst.cast(zir.Inst.FnType).?), + .intcast => return self.analyzeInstIntCast(scope, old_inst.cast(zir.Inst.IntCast).?), + .bitcast => return self.analyzeInstBitCast(scope, old_inst.cast(zir.Inst.BitCast).?), + .elemptr => return self.analyzeInstElemPtr(scope, old_inst.cast(zir.Inst.ElemPtr).?), + .add => return self.analyzeInstAdd(scope, old_inst.cast(zir.Inst.Add).?), + .cmp => return self.analyzeInstCmp(scope, old_inst.cast(zir.Inst.Cmp).?), + .condbr => return self.analyzeInstCondBr(scope, old_inst.cast(zir.Inst.CondBr).?), + .isnull => return self.analyzeInstIsNull(scope, old_inst.cast(zir.Inst.IsNull).?), + .isnonnull => return self.analyzeInstIsNonNull(scope, old_inst.cast(zir.Inst.IsNonNull).?), + } +} + +fn analyzeInstBreakpoint(self: *Module, scope: *Scope, inst: *zir.Inst.Breakpoint) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); +} + +fn analyzeInstRef(self: *Module, scope: *Scope, inst: *zir.Inst.Ref) InnerError!*Inst { + const decl = try self.resolveCompleteDecl(scope, inst.positionals.operand); + return self.analyzeDeclRef(scope, inst.base.src, decl); +} + +fn analyzeInstDeclRef(self: *Module, scope: *Scope, inst: *zir.Inst.DeclRef) InnerError!*Inst { + const decl_name = try self.resolveConstString(scope, inst.positionals.name); + // This will need to get more fleshed out when there are proper structs & namespaces. + const zir_module = scope.namespace(); + for (zir_module.contents.module.decls) |src_decl| { + if (mem.eql(u8, src_decl.name, decl_name)) { + const decl = try self.resolveCompleteDecl(scope, src_decl); + return self.analyzeDeclRef(scope, inst.base.src, decl); + } + } + return self.fail(scope, inst.positionals.name.src, "use of undeclared identifier '{}'", .{decl_name}); +} + +fn analyzeDeclRef(self: *Module, scope: *Scope, src: usize, decl: *Decl) InnerError!*Inst { + const decl_tv = try decl.typedValue(); + const ty_payload = try scope.arena().create(Type.Payload.SingleConstPointer); + ty_payload.* = .{ .pointee_type = decl_tv.ty }; + const val_payload = try scope.arena().create(Value.Payload.DeclRef); + val_payload.* = .{ .decl = decl }; + return self.constInst(scope, src, .{ + .ty = Type.initPayload(&ty_payload.base), + .val = Value.initPayload(&val_payload.base), + }); +} + +fn analyzeInstCall(self: *Module, scope: *Scope, inst: *zir.Inst.Call) InnerError!*Inst { + const func = try self.resolveInst(scope, inst.positionals.func); + if (func.ty.zigTypeTag() != .Fn) + return self.fail(scope, inst.positionals.func.src, "type '{}' not a function", .{func.ty}); + + const cc = func.ty.fnCallingConvention(); + if (cc == .Naked) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "unable to call function with naked calling convention", + .{}, + ); + } + const call_params_len = inst.positionals.args.len; + const fn_params_len = func.ty.fnParamLen(); + if (func.ty.fnIsVarArgs()) { + if (call_params_len < fn_params_len) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "expected at least {} arguments, found {}", + .{ fn_params_len, call_params_len }, + ); + } + return self.fail(scope, inst.base.src, "TODO implement support for calling var args functions", .{}); + } else if (fn_params_len != call_params_len) { + // TODO add error note: declared here + return self.fail( + scope, + inst.positionals.func.src, + "expected {} arguments, found {}", + .{ fn_params_len, call_params_len }, + ); + } + + if (inst.kw_args.modifier == .compile_time) { + return self.fail(scope, inst.base.src, "TODO implement comptime function calls", .{}); + } + if (inst.kw_args.modifier != .auto) { + return self.fail(scope, inst.base.src, "TODO implement call with modifier {}", .{inst.kw_args.modifier}); + } + + // TODO handle function calls of generic functions + + const fn_param_types = try self.allocator.alloc(Type, fn_params_len); + defer self.allocator.free(fn_param_types); + func.ty.fnParamTypes(fn_param_types); + + const casted_args = try scope.arena().alloc(*Inst, fn_params_len); + for (inst.positionals.args) |src_arg, i| { + const uncasted_arg = try self.resolveInst(scope, src_arg); + casted_args[i] = try self.coerce(scope, fn_param_types[i], uncasted_arg); + } + + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Call, Inst.Args(Inst.Call){ + .func = func, + .args = casted_args, + }); +} + +fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *zir.Inst.Fn) InnerError!*Inst { + const fn_type = try self.resolveType(scope, fn_inst.positionals.fn_type); + const new_func = try scope.arena().create(Fn); + new_func.* = .{ + .fn_type = fn_type, + .analysis = .{ .queued = fn_inst }, + }; + const fn_payload = try scope.arena().create(Value.Payload.Function); + fn_payload.* = .{ .func = new_func }; + return self.constInst(scope, fn_inst.base.src, .{ + .ty = fn_type, + .val = Value.initPayload(&fn_payload.base), + }); +} + +fn analyzeInstFnType(self: *Module, scope: *Scope, fntype: *zir.Inst.FnType) InnerError!*Inst { + const return_type = try self.resolveType(scope, fntype.positionals.return_type); + + if (return_type.zigTypeTag() == .NoReturn and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .Unspecified) + { + return self.constType(scope, fntype.base.src, Type.initTag(.fn_noreturn_no_args)); + } + + if (return_type.zigTypeTag() == .NoReturn and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .Naked) + { + return self.constType(scope, fntype.base.src, Type.initTag(.fn_naked_noreturn_no_args)); + } + + if (return_type.zigTypeTag() == .Void and + fntype.positionals.param_types.len == 0 and + fntype.kw_args.cc == .C) + { + return self.constType(scope, fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); + } + + return self.fail(scope, fntype.base.src, "TODO implement fntype instruction more", .{}); +} + +fn analyzeInstPrimitive(self: *Module, scope: *Scope, primitive: *zir.Inst.Primitive) InnerError!*Inst { + return self.constType(scope, primitive.base.src, primitive.positionals.tag.toType()); +} + +fn analyzeInstAs(self: *Module, scope: *Scope, as: *zir.Inst.As) InnerError!*Inst { + const dest_type = try self.resolveType(scope, as.positionals.dest_type); + const new_inst = try self.resolveInst(scope, as.positionals.value); + return self.coerce(scope, dest_type, new_inst); +} + +fn analyzeInstPtrToInt(self: *Module, scope: *Scope, ptrtoint: *zir.Inst.PtrToInt) InnerError!*Inst { + const ptr = try self.resolveInst(scope, ptrtoint.positionals.ptr); + if (ptr.ty.zigTypeTag() != .Pointer) { + return self.fail(scope, ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); + } + // TODO handle known-pointer-address + const b = try self.requireRuntimeBlock(scope, ptrtoint.base.src); + const ty = Type.initTag(.usize); + return self.addNewInstArgs(b, ptrtoint.base.src, ty, Inst.PtrToInt, Inst.Args(Inst.PtrToInt){ .ptr = ptr }); +} + +fn analyzeInstFieldPtr(self: *Module, scope: *Scope, fieldptr: *zir.Inst.FieldPtr) InnerError!*Inst { + const object_ptr = try self.resolveInst(scope, fieldptr.positionals.object_ptr); + const field_name = try self.resolveConstString(scope, fieldptr.positionals.field_name); + + const elem_ty = switch (object_ptr.ty.zigTypeTag()) { + .Pointer => object_ptr.ty.elemType(), + else => return self.fail(scope, fieldptr.positionals.object_ptr.src, "expected pointer, found '{}'", .{object_ptr.ty}), + }; + switch (elem_ty.zigTypeTag()) { + .Array => { + if (mem.eql(u8, field_name, "len")) { + const len_payload = try scope.arena().create(Value.Payload.Int_u64); + len_payload.* = .{ .int = elem_ty.arrayLen() }; + + const ref_payload = try scope.arena().create(Value.Payload.RefVal); + ref_payload.* = .{ .val = Value.initPayload(&len_payload.base) }; + + return self.constInst(scope, fieldptr.base.src, .{ + .ty = Type.initTag(.single_const_pointer_to_comptime_int), + .val = Value.initPayload(&ref_payload.base), + }); + } else { + return self.fail( + scope, + fieldptr.positionals.field_name.src, + "no member named '{}' in '{}'", + .{ field_name, elem_ty }, + ); + } + }, + else => return self.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), + } +} + +fn analyzeInstIntCast(self: *Module, scope: *Scope, intcast: *zir.Inst.IntCast) InnerError!*Inst { + const dest_type = try self.resolveType(scope, intcast.positionals.dest_type); + const new_inst = try self.resolveInst(scope, intcast.positionals.value); + + const dest_is_comptime_int = switch (dest_type.zigTypeTag()) { + .ComptimeInt => true, + .Int => false, + else => return self.fail( + scope, + intcast.positionals.dest_type.src, + "expected integer type, found '{}'", + .{ + dest_type, + }, + ), + }; + + switch (new_inst.ty.zigTypeTag()) { + .ComptimeInt, .Int => {}, + else => return self.fail( + scope, + intcast.positionals.value.src, + "expected integer type, found '{}'", + .{new_inst.ty}, + ), + } + + if (dest_is_comptime_int or new_inst.value() != null) { + return self.coerce(scope, dest_type, new_inst); + } + + return self.fail(scope, intcast.base.src, "TODO implement analyze widen or shorten int", .{}); +} + +fn analyzeInstBitCast(self: *Module, scope: *Scope, inst: *zir.Inst.BitCast) InnerError!*Inst { + const dest_type = try self.resolveType(scope, inst.positionals.dest_type); + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.bitcast(scope, dest_type, operand); +} + +fn analyzeInstElemPtr(self: *Module, scope: *Scope, inst: *zir.Inst.ElemPtr) InnerError!*Inst { + const array_ptr = try self.resolveInst(scope, inst.positionals.array_ptr); + const uncasted_index = try self.resolveInst(scope, inst.positionals.index); + const elem_index = try self.coerce(scope, Type.initTag(.usize), uncasted_index); + + if (array_ptr.ty.isSinglePointer() and array_ptr.ty.elemType().zigTypeTag() == .Array) { + if (array_ptr.value()) |array_ptr_val| { + if (elem_index.value()) |index_val| { + // Both array pointer and index are compile-time known. + const index_u64 = index_val.toUnsignedInt(); + // @intCast here because it would have been impossible to construct a value that + // required a larger index. + const elem_ptr = try array_ptr_val.elemPtr(scope.arena(), @intCast(usize, index_u64)); + + const type_payload = try scope.arena().create(Type.Payload.SingleConstPointer); + type_payload.* = .{ .pointee_type = array_ptr.ty.elemType().elemType() }; + + return self.constInst(scope, inst.base.src, .{ + .ty = Type.initPayload(&type_payload.base), + .val = elem_ptr, + }); + } + } + } + + return self.fail(scope, inst.base.src, "TODO implement more analyze elemptr", .{}); +} + +fn analyzeInstAdd(self: *Module, scope: *Scope, inst: *zir.Inst.Add) InnerError!*Inst { + const lhs = try self.resolveInst(scope, inst.positionals.lhs); + const rhs = try self.resolveInst(scope, inst.positionals.rhs); + + if (lhs.ty.zigTypeTag() == .Int and rhs.ty.zigTypeTag() == .Int) { + if (lhs.value()) |lhs_val| { + if (rhs.value()) |rhs_val| { + // TODO is this a performance issue? maybe we should try the operation without + // resorting to BigInt first. + var lhs_space: Value.BigIntSpace = undefined; + var rhs_space: Value.BigIntSpace = undefined; + const lhs_bigint = lhs_val.toBigInt(&lhs_space); + const rhs_bigint = rhs_val.toBigInt(&rhs_space); + const limbs = try scope.arena().alloc( + std.math.big.Limb, + std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1, + ); + var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined }; + result_bigint.add(lhs_bigint, rhs_bigint); + const result_limbs = result_bigint.limbs[0..result_bigint.len]; + + if (!lhs.ty.eql(rhs.ty)) { + return self.fail(scope, inst.base.src, "TODO implement peer type resolution", .{}); + } + + const val_payload = if (result_bigint.positive) blk: { + const val_payload = try scope.arena().create(Value.Payload.IntBigPositive); + val_payload.* = .{ .limbs = result_limbs }; + break :blk &val_payload.base; + } else blk: { + const val_payload = try scope.arena().create(Value.Payload.IntBigNegative); + val_payload.* = .{ .limbs = result_limbs }; + break :blk &val_payload.base; + }; + + return self.constInst(scope, inst.base.src, .{ + .ty = lhs.ty, + .val = Value.initPayload(val_payload), + }); + } + } + } + + return self.fail(scope, inst.base.src, "TODO implement more analyze add", .{}); +} + +fn analyzeInstDeref(self: *Module, scope: *Scope, deref: *zir.Inst.Deref) InnerError!*Inst { + const ptr = try self.resolveInst(scope, deref.positionals.ptr); + return self.analyzeDeref(scope, deref.base.src, ptr, deref.positionals.ptr.src); +} + +fn analyzeDeref(self: *Module, scope: *Scope, src: usize, ptr: *Inst, ptr_src: usize) InnerError!*Inst { + const elem_ty = switch (ptr.ty.zigTypeTag()) { + .Pointer => ptr.ty.elemType(), + else => return self.fail(scope, ptr_src, "expected pointer, found '{}'", .{ptr.ty}), + }; + if (ptr.value()) |val| { + return self.constInst(scope, src, .{ + .ty = elem_ty, + .val = try val.pointerDeref(scope.arena()), + }); + } + + return self.fail(scope, src, "TODO implement runtime deref", .{}); +} + +fn analyzeInstAsm(self: *Module, scope: *Scope, assembly: *zir.Inst.Asm) InnerError!*Inst { + const return_type = try self.resolveType(scope, assembly.positionals.return_type); + const asm_source = try self.resolveConstString(scope, assembly.positionals.asm_source); + const output = if (assembly.kw_args.output) |o| try self.resolveConstString(scope, o) else null; + + const inputs = try scope.arena().alloc([]const u8, assembly.kw_args.inputs.len); + const clobbers = try scope.arena().alloc([]const u8, assembly.kw_args.clobbers.len); + const args = try scope.arena().alloc(*Inst, assembly.kw_args.args.len); + + for (inputs) |*elem, i| { + elem.* = try self.resolveConstString(scope, assembly.kw_args.inputs[i]); + } + for (clobbers) |*elem, i| { + elem.* = try self.resolveConstString(scope, assembly.kw_args.clobbers[i]); + } + for (args) |*elem, i| { + const arg = try self.resolveInst(scope, assembly.kw_args.args[i]); + elem.* = try self.coerce(scope, Type.initTag(.usize), arg); + } + + const b = try self.requireRuntimeBlock(scope, assembly.base.src); + return self.addNewInstArgs(b, assembly.base.src, return_type, Inst.Assembly, Inst.Args(Inst.Assembly){ + .asm_source = asm_source, + .is_volatile = assembly.kw_args.@"volatile", + .output = output, + .inputs = inputs, + .clobbers = clobbers, + .args = args, + }); +} + +fn analyzeInstCmp(self: *Module, scope: *Scope, inst: *zir.Inst.Cmp) InnerError!*Inst { + const lhs = try self.resolveInst(scope, inst.positionals.lhs); + const rhs = try self.resolveInst(scope, inst.positionals.rhs); + const op = inst.positionals.op; + + const is_equality_cmp = switch (op) { + .eq, .neq => true, + else => false, + }; + const lhs_ty_tag = lhs.ty.zigTypeTag(); + const rhs_ty_tag = rhs.ty.zigTypeTag(); + if (is_equality_cmp and lhs_ty_tag == .Null and rhs_ty_tag == .Null) { + // null == null, null != null + return self.constBool(scope, inst.base.src, op == .eq); + } else if (is_equality_cmp and + ((lhs_ty_tag == .Null and rhs_ty_tag == .Optional) or + rhs_ty_tag == .Null and lhs_ty_tag == .Optional)) + { + // comparing null with optionals + const opt_operand = if (lhs_ty_tag == .Optional) lhs else rhs; + if (opt_operand.value()) |opt_val| { + const is_null = opt_val.isNull(); + return self.constBool(scope, inst.base.src, if (op == .eq) is_null else !is_null); + } + const b = try self.requireRuntimeBlock(scope, inst.base.src); + switch (op) { + .eq => return self.addNewInstArgs( + b, + inst.base.src, + Type.initTag(.bool), + Inst.IsNull, + Inst.Args(Inst.IsNull){ .operand = opt_operand }, + ), + .neq => return self.addNewInstArgs( + b, + inst.base.src, + Type.initTag(.bool), + Inst.IsNonNull, + Inst.Args(Inst.IsNonNull){ .operand = opt_operand }, + ), + else => unreachable, + } + } else if (is_equality_cmp and + ((lhs_ty_tag == .Null and rhs.ty.isCPtr()) or (rhs_ty_tag == .Null and lhs.ty.isCPtr()))) + { + return self.fail(scope, inst.base.src, "TODO implement C pointer cmp", .{}); + } else if (lhs_ty_tag == .Null or rhs_ty_tag == .Null) { + const non_null_type = if (lhs_ty_tag == .Null) rhs.ty else lhs.ty; + return self.fail(scope, inst.base.src, "comparison of '{}' with null", .{non_null_type}); + } else if (is_equality_cmp and + ((lhs_ty_tag == .EnumLiteral and rhs_ty_tag == .Union) or + (rhs_ty_tag == .EnumLiteral and lhs_ty_tag == .Union))) + { + return self.fail(scope, inst.base.src, "TODO implement equality comparison between a union's tag value and an enum literal", .{}); + } else if (lhs_ty_tag == .ErrorSet and rhs_ty_tag == .ErrorSet) { + if (!is_equality_cmp) { + return self.fail(scope, inst.base.src, "{} operator not allowed for errors", .{@tagName(op)}); + } + return self.fail(scope, inst.base.src, "TODO implement equality comparison between errors", .{}); + } else if (lhs.ty.isNumeric() and rhs.ty.isNumeric()) { + // This operation allows any combination of integer and float types, regardless of the + // signed-ness, comptime-ness, and bit-width. So peer type resolution is incorrect for + // numeric types. + return self.cmpNumeric(scope, inst.base.src, lhs, rhs, op); + } + return self.fail(scope, inst.base.src, "TODO implement more cmp analysis", .{}); +} + +fn analyzeInstIsNull(self: *Module, scope: *Scope, inst: *zir.Inst.IsNull) InnerError!*Inst { + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.analyzeIsNull(scope, inst.base.src, operand, true); +} + +fn analyzeInstIsNonNull(self: *Module, scope: *Scope, inst: *zir.Inst.IsNonNull) InnerError!*Inst { + const operand = try self.resolveInst(scope, inst.positionals.operand); + return self.analyzeIsNull(scope, inst.base.src, operand, false); +} + +fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *zir.Inst.CondBr) InnerError!*Inst { + const uncasted_cond = try self.resolveInst(scope, inst.positionals.condition); + const cond = try self.coerce(scope, Type.initTag(.bool), uncasted_cond); + + if (try self.resolveDefinedValue(scope, cond)) |cond_val| { + const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; + try self.analyzeBody(scope, body.*); + return self.constVoid(scope, inst.base.src); + } + + const parent_block = try self.requireRuntimeBlock(scope, inst.base.src); + + var true_block: Scope.Block = .{ + .func = parent_block.func, + .decl = parent_block.decl, + .instructions = .{}, + .arena = parent_block.arena, + }; + defer true_block.instructions.deinit(self.allocator); + try self.analyzeBody(&true_block.base, inst.positionals.true_body); + + var false_block: Scope.Block = .{ + .func = parent_block.func, + .decl = parent_block.decl, + .instructions = .{}, + .arena = parent_block.arena, + }; + defer false_block.instructions.deinit(self.allocator); + try self.analyzeBody(&false_block.base, inst.positionals.false_body); + + return self.addNewInstArgs(parent_block, inst.base.src, Type.initTag(.void), Inst.CondBr, Inst.Args(Inst.CondBr){ + .condition = cond, + .true_body = .{ .instructions = try scope.arena().dupe(*Inst, true_block.instructions.items) }, + .false_body = .{ .instructions = try scope.arena().dupe(*Inst, false_block.instructions.items) }, + }); +} + +fn wantSafety(self: *Module, scope: *Scope) bool { + return switch (self.optimize_mode) { + .Debug => true, + .ReleaseSafe => true, + .ReleaseFast => false, + .ReleaseSmall => false, + }; +} + +fn analyzeInstUnreachable(self: *Module, scope: *Scope, unreach: *zir.Inst.Unreachable) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, unreach.base.src); + if (self.wantSafety(scope)) { + // TODO Once we have a panic function to call, call it here instead of this. + _ = try self.addNewInstArgs(b, unreach.base.src, Type.initTag(.void), Inst.Breakpoint, {}); + } + return self.addNewInstArgs(b, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {}); +} + +fn analyzeInstRet(self: *Module, scope: *Scope, inst: *zir.Inst.Return) InnerError!*Inst { + const b = try self.requireRuntimeBlock(scope, inst.base.src); + return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.Ret, {}); +} + +fn analyzeBody(self: *Module, scope: *Scope, body: zir.Module.Body) !void { + if (scope.cast(Scope.Block)) |b| { + const analysis = b.func.analysis.in_progress; + analysis.needed_inst_capacity += body.instructions.len; + try analysis.inst_table.ensureCapacity(analysis.needed_inst_capacity); + for (body.instructions) |src_inst| { + const new_inst = try self.analyzeInst(scope, src_inst); + analysis.inst_table.putAssumeCapacityNoClobber(src_inst, new_inst); + } + } else { + for (body.instructions) |src_inst| { + _ = try self.analyzeInst(scope, src_inst); + } + } +} + +fn analyzeIsNull( + self: *Module, + scope: *Scope, + src: usize, + operand: *Inst, + invert_logic: bool, +) InnerError!*Inst { + return self.fail(scope, src, "TODO implement analysis of isnull and isnotnull", .{}); +} + +/// Asserts that lhs and rhs types are both numeric. +fn cmpNumeric( + self: *Module, + scope: *Scope, + src: usize, + lhs: *Inst, + rhs: *Inst, + op: std.math.CompareOperator, +) !*Inst { + assert(lhs.ty.isNumeric()); + assert(rhs.ty.isNumeric()); + + const lhs_ty_tag = lhs.ty.zigTypeTag(); + const rhs_ty_tag = rhs.ty.zigTypeTag(); + + if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) { + if (lhs.ty.arrayLen() != rhs.ty.arrayLen()) { + return self.fail(scope, src, "vector length mismatch: {} and {}", .{ + lhs.ty.arrayLen(), + rhs.ty.arrayLen(), + }); + } + return self.fail(scope, src, "TODO implement support for vectors in cmpNumeric", .{}); + } else if (lhs_ty_tag == .Vector or rhs_ty_tag == .Vector) { + return self.fail(scope, src, "mixed scalar and vector operands to comparison operator: '{}' and '{}'", .{ + lhs.ty, + rhs.ty, + }); + } + + if (lhs.value()) |lhs_val| { + if (rhs.value()) |rhs_val| { + return self.constBool(scope, src, Value.compare(lhs_val, op, rhs_val)); + } + } + + // TODO handle comparisons against lazy zero values + // Some values can be compared against zero without being runtime known or without forcing + // a full resolution of their value, for example `@sizeOf(@Frame(function))` is known to + // always be nonzero, and we benefit from not forcing the full evaluation and stack frame layout + // of this function if we don't need to. + + // It must be a runtime comparison. + const b = try self.requireRuntimeBlock(scope, src); + // For floats, emit a float comparison instruction. + const lhs_is_float = switch (lhs_ty_tag) { + .Float, .ComptimeFloat => true, + else => false, + }; + const rhs_is_float = switch (rhs_ty_tag) { + .Float, .ComptimeFloat => true, + else => false, + }; + if (lhs_is_float and rhs_is_float) { + // Implicit cast the smaller one to the larger one. + const dest_type = x: { + if (lhs_ty_tag == .ComptimeFloat) { + break :x rhs.ty; + } else if (rhs_ty_tag == .ComptimeFloat) { + break :x lhs.ty; + } + if (lhs.ty.floatBits(self.target()) >= rhs.ty.floatBits(self.target())) { + break :x lhs.ty; + } else { + break :x rhs.ty; + } + }; + const casted_lhs = try self.coerce(scope, dest_type, lhs); + const casted_rhs = try self.coerce(scope, dest_type, rhs); + return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ + .lhs = casted_lhs, + .rhs = casted_rhs, + .op = op, + }); + } + // For mixed unsigned integer sizes, implicit cast both operands to the larger integer. + // For mixed signed and unsigned integers, implicit cast both operands to a signed + // integer with + 1 bit. + // For mixed floats and integers, extract the integer part from the float, cast that to + // a signed integer with mantissa bits + 1, and if there was any non-integral part of the float, + // add/subtract 1. + const lhs_is_signed = if (lhs.value()) |lhs_val| + lhs_val.compareWithZero(.lt) + else + (lhs.ty.isFloat() or lhs.ty.isSignedInt()); + const rhs_is_signed = if (rhs.value()) |rhs_val| + rhs_val.compareWithZero(.lt) + else + (rhs.ty.isFloat() or rhs.ty.isSignedInt()); + const dest_int_is_signed = lhs_is_signed or rhs_is_signed; + + var dest_float_type: ?Type = null; + + var lhs_bits: usize = undefined; + if (lhs.value()) |lhs_val| { + if (lhs_val.isUndef()) + return self.constUndef(scope, src, Type.initTag(.bool)); + const is_unsigned = if (lhs_is_float) x: { + var bigint_space: Value.BigIntSpace = undefined; + var bigint = try lhs_val.toBigInt(&bigint_space).toManaged(self.allocator); + defer bigint.deinit(); + const zcmp = lhs_val.orderAgainstZero(); + if (lhs_val.floatHasFraction()) { + switch (op) { + .eq => return self.constBool(scope, src, false), + .neq => return self.constBool(scope, src, true), + else => {}, + } + if (zcmp == .lt) { + try bigint.addScalar(bigint.toConst(), -1); + } else { + try bigint.addScalar(bigint.toConst(), 1); + } + } + lhs_bits = bigint.toConst().bitCountTwosComp(); + break :x (zcmp != .lt); + } else x: { + lhs_bits = lhs_val.intBitCountTwosComp(); + break :x (lhs_val.orderAgainstZero() != .lt); + }; + lhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); + } else if (lhs_is_float) { + dest_float_type = lhs.ty; + } else { + const int_info = lhs.ty.intInfo(self.target()); + lhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); + } + + var rhs_bits: usize = undefined; + if (rhs.value()) |rhs_val| { + if (rhs_val.isUndef()) + return self.constUndef(scope, src, Type.initTag(.bool)); + const is_unsigned = if (rhs_is_float) x: { + var bigint_space: Value.BigIntSpace = undefined; + var bigint = try rhs_val.toBigInt(&bigint_space).toManaged(self.allocator); + defer bigint.deinit(); + const zcmp = rhs_val.orderAgainstZero(); + if (rhs_val.floatHasFraction()) { + switch (op) { + .eq => return self.constBool(scope, src, false), + .neq => return self.constBool(scope, src, true), + else => {}, + } + if (zcmp == .lt) { + try bigint.addScalar(bigint.toConst(), -1); + } else { + try bigint.addScalar(bigint.toConst(), 1); + } + } + rhs_bits = bigint.toConst().bitCountTwosComp(); + break :x (zcmp != .lt); + } else x: { + rhs_bits = rhs_val.intBitCountTwosComp(); + break :x (rhs_val.orderAgainstZero() != .lt); + }; + rhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); + } else if (rhs_is_float) { + dest_float_type = rhs.ty; + } else { + const int_info = rhs.ty.intInfo(self.target()); + rhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); + } + + const dest_type = if (dest_float_type) |ft| ft else blk: { + const max_bits = std.math.max(lhs_bits, rhs_bits); + const casted_bits = std.math.cast(u16, max_bits) catch |err| switch (err) { + error.Overflow => return self.fail(scope, src, "{} exceeds maximum integer bit count", .{max_bits}), + }; + break :blk try self.makeIntType(scope, dest_int_is_signed, casted_bits); + }; + const casted_lhs = try self.coerce(scope, dest_type, lhs); + const casted_rhs = try self.coerce(scope, dest_type, lhs); + + return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ + .lhs = casted_lhs, + .rhs = casted_rhs, + .op = op, + }); +} + +fn makeIntType(self: *Module, scope: *Scope, signed: bool, bits: u16) !Type { + if (signed) { + const int_payload = try scope.arena().create(Type.Payload.IntSigned); + int_payload.* = .{ .bits = bits }; + return Type.initPayload(&int_payload.base); + } else { + const int_payload = try scope.arena().create(Type.Payload.IntUnsigned); + int_payload.* = .{ .bits = bits }; + return Type.initPayload(&int_payload.base); + } +} + +fn coerce(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { + // If the types are the same, we can return the operand. + if (dest_type.eql(inst.ty)) + return inst; + + const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty); + if (in_memory_result == .ok) { + return self.bitcast(scope, dest_type, inst); + } + + // *[N]T to []T + if (inst.ty.isSinglePointer() and dest_type.isSlice() and + (!inst.ty.pointerIsConst() or dest_type.pointerIsConst())) + { + const array_type = inst.ty.elemType(); + const dst_elem_type = dest_type.elemType(); + if (array_type.zigTypeTag() == .Array and + coerceInMemoryAllowed(dst_elem_type, array_type.elemType()) == .ok) + { + return self.coerceArrayPtrToSlice(scope, dest_type, inst); + } + } + + // comptime_int to fixed-width integer + if (inst.ty.zigTypeTag() == .ComptimeInt and dest_type.zigTypeTag() == .Int) { + // The representation is already correct; we only need to make sure it fits in the destination type. + const val = inst.value().?; // comptime_int always has comptime known value + if (!val.intFitsInType(dest_type, self.target())) { + return self.fail(scope, inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); + } + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } + + // integer widening + if (inst.ty.zigTypeTag() == .Int and dest_type.zigTypeTag() == .Int) { + const src_info = inst.ty.intInfo(self.target()); + const dst_info = dest_type.intInfo(self.target()); + if (src_info.signed == dst_info.signed and dst_info.bits >= src_info.bits) { + if (inst.value()) |val| { + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } else { + return self.fail(scope, inst.src, "TODO implement runtime integer widening", .{}); + } + } else { + return self.fail(scope, inst.src, "TODO implement more int widening {} to {}", .{ inst.ty, dest_type }); + } + } + + return self.fail(scope, inst.src, "TODO implement type coercion from {} to {}", .{ inst.ty, dest_type }); +} + +fn bitcast(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { + if (inst.value()) |val| { + // Keep the comptime Value representation; take the new type. + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } + // TODO validate the type size and other compile errors + const b = try self.requireRuntimeBlock(scope, inst.src); + return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst }); +} + +fn coerceArrayPtrToSlice(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { + if (inst.value()) |val| { + // The comptime Value representation is compatible with both types. + return self.constInst(scope, inst.src, .{ .ty = dest_type, .val = val }); + } + return self.fail(scope, inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); +} + +fn fail(self: *Module, scope: *Scope, src: usize, comptime format: []const u8, args: var) InnerError { + @setCold(true); + const err_msg = try ErrorMsg.create(self.allocator, src, format, args); + return self.failWithOwnedErrorMsg(scope, src, err_msg); +} + +fn failWithOwnedErrorMsg(self: *Module, scope: *Scope, src: usize, err_msg: *ErrorMsg) InnerError { + { + errdefer err_msg.destroy(self.allocator); + try self.failed_decls.ensureCapacity(self.failed_decls.size + 1); + try self.failed_files.ensureCapacity(self.failed_files.size + 1); + } + switch (scope.tag) { + .decl => { + const decl = scope.cast(Scope.DeclAnalysis).?.decl; + switch (decl.analysis) { + .initial_in_progress => decl.analysis = .initial_sema_failure, + .repeat_in_progress => decl.analysis = .repeat_sema_failure, + else => unreachable, + } + self.failed_decls.putAssumeCapacityNoClobber(decl, err_msg); + }, + .block => { + const block = scope.cast(Scope.Block).?; + block.func.analysis = .sema_failure; + self.failed_decls.putAssumeCapacityNoClobber(block.decl, err_msg); + }, + .zir_module => { + const zir_module = scope.cast(Scope.ZIRModule).?; + zir_module.status = .loaded_sema_failure; + self.failed_files.putAssumeCapacityNoClobber(zir_module, err_msg); + }, + } + return error.AnalysisFail; +} + +const InMemoryCoercionResult = enum { + ok, + no_match, +}; + +fn coerceInMemoryAllowed(dest_type: Type, src_type: Type) InMemoryCoercionResult { + if (dest_type.eql(src_type)) + return .ok; + + // TODO: implement more of this function + + return .no_match; +} + +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, + + pub fn create(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !*ErrorMsg { + const self = try allocator.create(ErrorMsg); + errdefer allocator.destroy(self); + self.* = try init(allocator, byte_offset, format, args); + return self; + } + + /// Assumes the ErrorMsg struct and msg were both allocated with allocator. + pub fn destroy(self: *ErrorMsg, allocator: *Allocator) void { + self.deinit(allocator); + allocator.destroy(self); + } + + pub fn init(allocator: *Allocator, byte_offset: usize, comptime format: []const u8, args: var) !ErrorMsg { + return ErrorMsg{ + .byte_offset = byte_offset, + .msg = try std.fmt.allocPrint(allocator, format, args), + }; + } + + pub fn deinit(self: *ErrorMsg, allocator: *Allocator) void { + allocator.free(self.msg); + self.* = undefined; + } +}; diff --git a/src-self-hosted/Package.zig b/src-self-hosted/Package.zig new file mode 100644 index 0000000000..c70b3b6bd0 --- /dev/null +++ b/src-self-hosted/Package.zig @@ -0,0 +1,53 @@ +pub const Table = std.StringHashMap(*Package); + +root_src_dir: std.fs.Dir, +/// Relative to `root_src_dir`. +root_src_path: []const u8, +table: Table, + +/// No references to `root_src_dir` and `root_src_path` are kept. +pub fn create( + allocator: *mem.Allocator, + base_dir: std.fs.Dir, + /// Relative to `base_dir`. + root_src_dir: []const u8, + /// Relative to `root_src_dir`. + root_src_path: []const u8, +) !*Package { + const ptr = try allocator.create(Package); + errdefer allocator.destroy(ptr); + const root_src_path_dupe = try mem.dupe(allocator, u8, root_src_path); + errdefer allocator.free(root_src_path_dupe); + ptr.* = .{ + .root_src_dir = try base_dir.openDir(root_src_dir, .{}), + .root_src_path = root_src_path_dupe, + .table = Table.init(allocator), + }; + return ptr; +} + +pub fn destroy(self: *Package) void { + const allocator = self.table.allocator; + self.root_src_dir.close(); + allocator.free(self.root_src_path); + { + var it = self.table.iterator(); + while (it.next()) |kv| { + allocator.free(kv.key); + } + } + self.table.deinit(); + allocator.destroy(self); +} + +pub fn add(self: *Package, name: []const u8, package: *Package) !void { + const name_dupe = try mem.dupe(self.table.allocator, u8, name); + errdefer self.table.allocator.deinit(name_dupe); + const entry = try self.table.put(name_dupe, package); + assert(entry == null); +} + +const std = @import("std"); +const mem = std.mem; +const assert = std.debug.assert; +const Package = @This(); diff --git a/src-self-hosted/TypedValue.zig b/src-self-hosted/TypedValue.zig new file mode 100644 index 0000000000..83a8f3c09f --- /dev/null +++ b/src-self-hosted/TypedValue.zig @@ -0,0 +1,23 @@ +const std = @import("std"); +const Type = @import("type.zig").Type; +const Value = @import("value.zig").Value; +const Allocator = std.mem.Allocator; +const TypedValue = @This(); + +ty: Type, +val: Value, + +/// Memory management for TypedValue. The main purpose of this type +/// is to be small and have a deinit() function to free associated resources. +pub const Managed = struct { + /// If the tag value is less than Tag.no_payload_count, then no pointer + /// dereference is needed. + typed_value: TypedValue, + /// If this is `null` then there is no memory management needed. + arena: ?*std.heap.ArenaAllocator.State = null, + + pub fn deinit(self: *Managed, allocator: *Allocator) void { + if (self.arena) |a| a.promote(allocator).deinit(); + self.* = undefined; + } +}; diff --git a/src-self-hosted/c.zig b/src-self-hosted/c.zig deleted file mode 100644 index ae9a886d1b..0000000000 --- a/src-self-hosted/c.zig +++ /dev/null @@ -1,7 +0,0 @@ -pub usingnamespace @cImport({ - @cDefine("__STDC_CONSTANT_MACROS", ""); - @cDefine("__STDC_LIMIT_MACROS", ""); - @cInclude("inttypes.h"); - @cInclude("config.h"); - @cInclude("zig_llvm.h"); -}); diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 675b8faad2..08a7b29ca3 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -4,64 +4,155 @@ const assert = std.debug.assert; const ir = @import("ir.zig"); const Type = @import("type.zig").Type; const Value = @import("value.zig").Value; +const TypedValue = @import("TypedValue.zig"); +const link = @import("link.zig"); +const Module = @import("Module.zig"); +const ErrorMsg = Module.ErrorMsg; const Target = std.Target; +const Allocator = mem.Allocator; -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, +pub const Result = union(enum) { + /// The `code` parameter passed to `generateSymbol` has the value appended. + appended: void, + /// The value is available externally, `code` is unused. + externally_managed: []const u8, + fail: *Module.ErrorMsg, }; -pub const Symbol = struct { - errors: []ErrorMsg, - - pub fn deinit(self: *Symbol, allocator: *mem.Allocator) void { - for (self.errors) |err| { - allocator.free(err.msg); - } - allocator.free(self.errors); - self.* = undefined; - } -}; - -pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !Symbol { +pub fn generateSymbol( + bin_file: *link.ElfFile, + src: usize, + typed_value: TypedValue, + code: *std.ArrayList(u8), +) error{ + OutOfMemory, + /// A Decl that this symbol depends on had a semantic analysis failure. + AnalysisFail, +}!Result { switch (typed_value.ty.zigTypeTag()) { .Fn => { - const index = typed_value.val.cast(Value.Payload.Function).?.index; - const module_fn = module.fns[index]; + const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; var function = Function{ - .module = &module, - .mod_fn = &module_fn, + .target = &bin_file.options.target, + .bin_file = bin_file, + .mod_fn = module_fn, .code = code, - .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator), - .errors = std.ArrayList(ErrorMsg).init(code.allocator), + .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator), + .err_msg = null, }; defer function.inst_table.deinit(); - defer function.errors.deinit(); - for (module_fn.body.instructions) |inst| { + for (module_fn.analysis.success.instructions) |inst| { const new_inst = function.genFuncInst(inst) catch |err| switch (err) { - error.CodegenFail => { - assert(function.errors.items.len != 0); - break; - }, + error.CodegenFail => return Result{ .fail = function.err_msg.? }, else => |e| return e, }; try function.inst_table.putNoClobber(inst, new_inst); } - return Symbol{ .errors = function.errors.toOwnedSlice() }; + if (function.err_msg) |em| { + return Result{ .fail = em }; + } else { + return Result{ .appended = {} }; + } + }, + .Array => { + if (typed_value.val.cast(Value.Payload.Bytes)) |payload| { + if (typed_value.ty.arraySentinel()) |sentinel| { + try code.ensureCapacity(code.items.len + payload.data.len + 1); + code.appendSliceAssumeCapacity(payload.data); + const prev_len = code.items.len; + switch (try generateSymbol(bin_file, src, .{ + .ty = typed_value.ty.elemType(), + .val = sentinel, + }, code)) { + .appended => return Result{ .appended = {} }, + .externally_managed => |slice| { + code.appendSliceAssumeCapacity(slice); + return Result{ .appended = {} }; + }, + .fail => |em| return Result{ .fail = em }, + } + } else { + return Result{ .externally_managed = payload.data }; + } + } + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for more kinds of arrays", + .{}, + ), + }; + }, + .Pointer => { + if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { + const decl = payload.decl; + if (decl.analysis != .complete) return error.AnalysisFail; + assert(decl.link.local_sym_index != 0); + // TODO handle the dependency of this symbol on the decl's vaddr. + // If the decl changes vaddr, then this symbol needs to get regenerated. + const vaddr = bin_file.local_symbols.items[decl.link.local_sym_index].st_value; + const endian = bin_file.options.target.cpu.arch.endian(); + switch (bin_file.ptr_width) { + .p32 => { + try code.resize(4); + mem.writeInt(u32, code.items[0..4], @intCast(u32, vaddr), endian); + }, + .p64 => { + try code.resize(8); + mem.writeInt(u64, code.items[0..8], vaddr, endian); + }, + } + return Result{ .appended = {} }; + } + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for pointer {}", + .{typed_value.val}, + ), + }; + }, + .Int => { + const info = typed_value.ty.intInfo(bin_file.options.target); + if (info.bits == 8 and !info.signed) { + const x = typed_value.val.toUnsignedInt(); + try code.append(@intCast(u8, x)); + return Result{ .appended = {} }; + } + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for int type '{}'", + .{typed_value.ty}, + ), + }; + }, + else => |t| { + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src, + "TODO implement generateSymbol for type '{}'", + .{@tagName(t)}, + ), + }; }, - else => @panic("TODO implement generateSymbol for non-function types"), } } const Function = struct { - module: *const ir.Module, - mod_fn: *const ir.Module.Fn, + bin_file: *link.ElfFile, + target: *const std.Target, + mod_fn: *const Module.Fn, code: *std.ArrayList(u8), inst_table: std.AutoHashMap(*ir.Inst, MCValue), - errors: std.ArrayList(ErrorMsg), + err_msg: ?*ErrorMsg, const MCValue = union(enum) { none, @@ -73,11 +164,14 @@ const Function = struct { /// The value is in a target-specific register. The value can /// be @intToEnum casted to the respective Reg enum. register: usize, + /// The value is in memory at a hard-coded address. + memory: u64, }; fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue { switch (inst.tag) { .breakpoint => return self.genBreakpoint(inst.src), + .call => return self.genCall(inst.cast(ir.Inst.Call).?), .unreach => return MCValue{ .unreach = {} }, .constant => unreachable, // excluded from function bodies .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?), @@ -92,54 +186,76 @@ const Function = struct { } fn genBreakpoint(self: *Function, src: usize) !MCValue { - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .i386, .x86_64 => { try self.code.append(0xcc); // int3 }, - else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.module.target.cpu.arch}), + else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}), + } + return .none; + } + + fn genCall(self: *Function, inst: *ir.Inst.Call) !MCValue { + if (inst.args.func.cast(ir.Inst.Constant)) |func_inst| { + if (inst.args.args.len != 0) { + return self.fail(inst.base.src, "TODO implement call with more than 0 parameters", .{}); + } + + if (func_inst.val.cast(Value.Payload.Function)) |func_val| { + const func = func_val.func; + return self.fail(inst.base.src, "TODO implement calling function", .{}); + } else { + return self.fail(inst.base.src, "TODO implement calling weird function values", .{}); + } + } else { + return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); + } + + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), } - return .unreach; } fn genRet(self: *Function, inst: *ir.Inst.Ret) !MCValue { - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .i386, .x86_64 => { try self.code.append(0xc3); // ret }, - else => return self.fail(inst.base.src, "TODO implement return for {}", .{self.module.target.cpu.arch}), + else => return self.fail(inst.base.src, "TODO implement return for {}", .{self.target.cpu.arch}), } return .unreach; } fn genCmp(self: *Function, inst: *ir.Inst.Cmp) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.target.cpu.arch}), } } fn genCondBr(self: *Function, inst: *ir.Inst.CondBr) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}), } } fn genIsNull(self: *Function, inst: *ir.Inst.IsNull) !MCValue { - switch (self.module.target.cpu.arch) { - else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.module.target.cpu.arch}), + switch (self.target.cpu.arch) { + else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.target.cpu.arch}), } } fn genIsNonNull(self: *Function, inst: *ir.Inst.IsNonNull) !MCValue { // Here you can specialize this instruction if it makes sense to, otherwise the default // will call genIsNull and invert the result. - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { else => return self.fail(inst.base.src, "TODO call genIsNull and invert the result ", .{}), } } fn genRelativeFwdJump(self: *Function, src: usize, amount: u32) !void { - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .i386, .x86_64 => { + // TODO x86 treats the operands as signed if (amount <= std.math.maxInt(u8)) { try self.code.resize(self.code.items.len + 2); self.code.items[self.code.items.len - 2] = 0xeb; @@ -151,13 +267,13 @@ const Function = struct { mem.writeIntLittle(u32, imm_ptr, amount); } }, - else => return self.fail(src, "TODO implement relative forward jump for {}", .{self.module.target.cpu.arch}), + else => return self.fail(src, "TODO implement relative forward jump for {}", .{self.target.cpu.arch}), } } fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue { // TODO convert to inline function - switch (self.module.target.cpu.arch) { + switch (self.target.cpu.arch) { .arm => return self.genAsmArch(.arm, inst), .armeb => return self.genAsmArch(.armeb, inst), .aarch64 => return self.genAsmArch(.aarch64, inst), @@ -246,128 +362,182 @@ const Function = struct { } } - fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) !void { + fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { switch (arch) { - .x86_64 => switch (reg) { - .rax => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the eax register zeroes the upper part of rax, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 c0 xor eax,eax - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xc0 }); - } - // Next best case: set eax with 4 bytes - // b8 04 03 02 01 mov eax,0x01020304 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xb8; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rax with 8 bytes - // 48 b8 08 07 06 05 04 03 02 01 movabs rax,0x0102030405060708 - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xb8; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}), - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}), - }, - .rdx => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the edx register zeroes the upper part of rdx, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 d2 xor edx,edx - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xd2 }); - } - // Next best case: set edx with 4 bytes - // ba 04 03 02 01 mov edx,0x1020304 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xba; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rdx with 8 bytes - // 48 ba 08 07 06 05 04 03 02 01 movabs rdx,0x0102030405060708 - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xba; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}), - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}), - }, - .rdi => switch (mcv) { - .none, .unreach => unreachable, - .immediate => |x| { - // Setting the edi register zeroes the upper part of rdi, so if the number is small - // enough, that is preferable. - // Best case: zero - // 31 ff xor edi,edi - if (x == 0) { - return self.code.appendSlice(&[_]u8{ 0x31, 0xff }); - } - // Next best case: set edi with 4 bytes - // bf 04 03 02 01 mov edi,0x1020304 - if (x <= std.math.maxInt(u32)) { - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xbf; - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: set rdi with 8 bytes - // 48 bf 08 07 06 05 04 03 02 01 movabs rax,0x0102030405060708 - try self.code.resize(self.code.items.len + 10); - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xbf; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - return; - }, - .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = embedded_in_code", .{}), - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = register", .{}), - }, - .rsi => switch (mcv) { - .none, .unreach => unreachable, - .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = immediate", .{}), - .embedded_in_code => |code_offset| { - // Examples: - // lea rsi, [rip + 0x01020304] - // lea rsi, [rip - 7] - // f: 48 8d 35 04 03 02 01 lea rsi,[rip+0x1020304] # 102031a <_start+0x102031a> - // 16: 48 8d 35 f9 ff ff ff lea rsi,[rip+0xfffffffffffffff9] # 16 <_start+0x16> + .x86_64 => switch (mcv) { + .none, .unreach => unreachable, + .immediate => |x| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit + // register is the fastest way to zero a register. + if (x == 0) { + // The encoding for `xor r32, r32` is `0x31 /r`. + // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the + // ModR/M byte of the instruction contains a register operand and an r/m operand." // - // We need the offset from RIP in a signed i32 twos complement. - // The instruction is 7 bytes long and RIP points to the next instruction. - try self.code.resize(self.code.items.len + 7); - const rip = self.code.items.len; - const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); - const offset = @intCast(i32, big_offset); - self.code.items[self.code.items.len - 7] = 0x48; - self.code.items[self.code.items.len - 6] = 0x8d; - self.code.items[self.code.items.len - 5] = 0x35; + // R/M bytes are composed of two bits for the mode, then three bits for the register, + // then three bits for the operand. Since we're zeroing a register, the two three-bit + // values will be identical, and the mode is three (the raw register value). + // + if (reg.isExtended()) { + // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since + // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. + // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. + // + // From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB. In this case, that's + // b01000101, or 0x45. + return self.code.appendSlice(&[_]u8{ + 0x45, + 0x31, + 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()), + }); + } else { + return self.code.appendSlice(&[_]u8{ + 0x31, + 0xC0 | (@as(u8, reg.id()) << 3) | reg.id(), + }); + } + } + if (x <= std.math.maxInt(u32)) { + // Next best case: if we set the lower four bytes, the upper four will be zeroed. + // + // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. + if (reg.isExtended()) { + // Just as with XORing, we need a REX prefix. This time though, we only + // need the B bit set, as we're extending the opcode's register field, + // and there is no Mod R/M byte. + // + // Thus, we need b01000001, or 0x41. + try self.code.resize(self.code.items.len + 6); + self.code.items[self.code.items.len - 6] = 0x41; + } else { + try self.code.resize(self.code.items.len + 5); + } + self.code.items[self.code.items.len - 5] = 0xB8 | @as(u8, reg.id() & 0b111); const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(i32, imm_ptr, offset); + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); return; - }, - .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}), + } + // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls + // this `movabs`, though this is officially just a different variant of the plain `mov` + // instruction. + // + // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only + // difference is that we set REX.W before the instruction, which extends the load to + // 64-bit and uses the full bit-width of the register. + // + // Since we always need a REX here, let's just check if we also need to set REX.B. + // + // In this case, the encoding of the REX byte is 0b0100100B + const REX = 0x48 | (if (reg.isExtended()) @as(u8, 0x01) else 0); + try self.code.resize(self.code.items.len + 10); + self.code.items[self.code.items.len - 10] = REX; + self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111); + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + }, + .embedded_in_code => |code_offset| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + // We need the offset from RIP in a signed i32 twos complement. + // The instruction is 7 bytes long and RIP points to the next instruction. + // + // 64-bit LEA is encoded as REX.W 8D /r. If the register is extended, the REX byte is modified, + // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three + // bits as five. + // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id. + try self.code.resize(self.code.items.len + 7); + const REX = 0x48 | if (reg.isExtended()) @as(u8, 1) else 0; + const rip = self.code.items.len; + const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); + const offset = @intCast(i32, big_offset); + self.code.items[self.code.items.len - 7] = REX; + self.code.items[self.code.items.len - 6] = 0x8D; + self.code.items[self.code.items.len - 5] = 0b101 | (@as(u8, reg.id() & 0b111) << 3); + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(i32, imm_ptr, offset); + }, + .register => |r| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + const src_reg = @intToEnum(Reg(arch), @intCast(u8, r)); + // This is a varient of 8B /r. Since we're using 64-bit moves, we require a REX. + // This is thus three bytes: REX 0x8B R/M. + // If the destination is extended, the R field must be 1. + // If the *source* is extended, the B field must be 1. + // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle + // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. + const REX = 0x48 | (if (reg.isExtended()) @as(u8, 4) else 0) | (if (src_reg.isExtended()) @as(u8, 1) else 0); + const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, src_reg.id()); + try self.code.appendSlice(&[_]u8{ REX, 0x8B, R }); + }, + .memory => |x| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + if (x <= std.math.maxInt(u32)) { + // Moving from memory to a register is a variant of `8B /r`. + // Since we're using 64-bit moves, we require a REX. + // This variant also requires a SIB, as it would otherwise be RIP-relative. + // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. + // The SIB must be 0x25, to indicate a disp32 with no scaled index. + // 0b00RRR100, where RRR is the lower three bits of the register ID. + // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. + try self.code.resize(self.code.items.len + 8); + const REX = 0x48 | if (reg.isExtended()) @as(u8, 1) else 0; + const r = 0x04 | (@as(u8, reg.id() & 0b111) << 3); + self.code.items[self.code.items.len - 8] = REX; + self.code.items[self.code.items.len - 7] = 0x8B; + self.code.items[self.code.items.len - 6] = r; + self.code.items[self.code.items.len - 5] = 0x25; + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + } else { + // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load + // the value. + if (reg.id() == 0) { + // REX.W 0xA1 moffs64* + // moffs64* is a 64-bit offset "relative to segment base", which really just means the + // absolute address for all practical purposes. + try self.code.resize(self.code.items.len + 10); + // REX.W == 0x48 + self.code.items[self.code.items.len - 10] = 0x48; + self.code.items[self.code.items.len - 9] = 0xA1; + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + } else { + // This requires two instructions; a move imm as used above, followed by an indirect load using the register + // as the address and the register as the destination. + // + // This cannot be used if the lower three bits of the id are equal to four or five, as there + // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with + // this instruction. + const id3 = @truncate(u3, reg.id()); + std.debug.assert(id3 != 4 and id3 != 5); + + // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. + try self.genSetReg(src, arch, reg, MCValue{ .immediate = x }); + + // Now, the register contains the address of the value to load into it + // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. + // TODO: determine whether to allow other sized registers, and if so, handle them properly. + // This operation requires three bytes: REX 0x8B R/M + // + // For this operation, we want R/M mode *zero* (use register indirectly), and the two register + // values must match. Thus, it's 00ABCABC where ABC is the lower three bits of the register ID. + // + // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* + // register operands need to be marked as extended. + const REX = 0x48 | if (reg.isExtended()) @as(u8, 0b0101) else 0; + const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()); + try self.code.appendSlice(&[_]u8{ REX, 0x8B, RM }); + } + } }, - else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}), }, else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}), } @@ -396,30 +566,22 @@ const Function = struct { } } - fn genTypedValue(self: *Function, src: usize, typed_value: ir.TypedValue) !MCValue { + fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue { + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + const allocator = self.code.allocator; switch (typed_value.ty.zigTypeTag()) { .Pointer => { - const ptr_elem_type = typed_value.ty.elemType(); - switch (ptr_elem_type.zigTypeTag()) { - .Array => { - // TODO more checks to make sure this can be emitted as a string literal - const bytes = try typed_value.val.toAllocatedBytes(self.code.allocator); - defer self.code.allocator.free(bytes); - const smaller_len = std.math.cast(u32, bytes.len) catch - return self.fail(src, "TODO handle a larger string constant", .{}); - - // Emit the string literal directly into the code; jump over it. - try self.genRelativeFwdJump(src, smaller_len); - const offset = self.code.items.len; - try self.code.appendSlice(bytes); - return MCValue{ .embedded_in_code = offset }; - }, - else => |t| return self.fail(src, "TODO implement emitTypedValue for pointer to '{}'", .{@tagName(t)}), + if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { + const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; + const decl = payload.decl; + const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes; + return MCValue{ .memory = got_addr }; } + return self.fail(src, "TODO codegen more kinds of const pointers", .{}); }, .Int => { - const info = typed_value.ty.intInfo(self.module.target); - const ptr_bits = self.module.target.cpu.arch.ptrBitWidth(); + const info = typed_value.ty.intInfo(self.target.*); if (info.bits > ptr_bits or info.signed) { return self.fail(src, "TODO const int bigger than ptr and signed int", .{}); } @@ -433,127 +595,19 @@ const Function = struct { fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } { @setCold(true); - const msg = try std.fmt.allocPrint(self.errors.allocator, format, args); - { - errdefer self.errors.allocator.free(msg); - (try self.errors.addOne()).* = .{ - .byte_offset = src, - .msg = msg, - }; - } + assert(self.err_msg == null); + self.err_msg = try ErrorMsg.create(self.code.allocator, src, format, args); return error.CodegenFail; } }; +const x86_64 = @import("codegen/x86_64.zig"); +const x86 = @import("codegen/x86.zig"); + fn Reg(comptime arch: Target.Cpu.Arch) type { return switch (arch) { - .i386 => enum { - eax, - ebx, - ecx, - edx, - ebp, - esp, - esi, - edi, - - ax, - bx, - cx, - dx, - bp, - sp, - si, - di, - - ah, - bh, - ch, - dh, - - al, - bl, - cl, - dl, - }, - .x86_64 => enum { - rax, - rbx, - rcx, - rdx, - rbp, - rsp, - rsi, - rdi, - r8, - r9, - r10, - r11, - r12, - r13, - r14, - r15, - - eax, - ebx, - ecx, - edx, - ebp, - esp, - esi, - edi, - r8d, - r9d, - r10d, - r11d, - r12d, - r13d, - r14d, - r15d, - - ax, - bx, - cx, - dx, - bp, - sp, - si, - di, - r8w, - r9w, - r10w, - r11w, - r12w, - r13w, - r14w, - r15w, - - ah, - bh, - ch, - dh, - bph, - sph, - sih, - dih, - - al, - bl, - cl, - dl, - bpl, - spl, - sil, - dil, - r8b, - r9b, - r10b, - r11b, - r12b, - r13b, - r14b, - r15b, - }, + .i386 => x86.Register, + .x86_64 => x86_64.Register, else => @compileError("TODO add more register enums"), }; } diff --git a/src-self-hosted/codegen/x86.zig b/src-self-hosted/codegen/x86.zig new file mode 100644 index 0000000000..60872dedb9 --- /dev/null +++ b/src-self-hosted/codegen/x86.zig @@ -0,0 +1,30 @@ +// zig fmt: off +pub const Register = enum(u8) { + // 0 through 7, 32-bit registers. id is int value + eax, ecx, edx, ebx, esp, ebp, esi, edi, + + // 8-15, 16-bit registers. id is int value - 8. + ax, cx, dx, bx, sp, bp, si, di, + + // 16-23, 8-bit registers. id is int value - 16. + al, bl, cl, dl, ah, ch, dh, bh, + + /// Returns the bit-width of the register. + pub fn size(self: @This()) u7 { + return switch (@enumToInt(self)) { + 0...7 => 32, + 8...15 => 16, + 16...23 => 8, + else => unreachable, + }; + } + + /// Returns the register's id. This is used in practically every opcode the + /// x86 has. It is embedded in some instructions, such as the `B8 +rd` move + /// instruction, and is used in the R/M byte. + pub fn id(self: @This()) u3 { + return @truncate(u3, @enumToInt(self)); + } +}; + +// zig fmt: on diff --git a/src-self-hosted/codegen/x86_64.zig b/src-self-hosted/codegen/x86_64.zig new file mode 100644 index 0000000000..0cc008ae1b --- /dev/null +++ b/src-self-hosted/codegen/x86_64.zig @@ -0,0 +1,53 @@ +// zig fmt: off +pub const Register = enum(u8) { + // 0 through 15, 64-bit registers. 8-15 are extended. + // id is just the int value. + rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, + r8, r9, r10, r11, r12, r13, r14, r15, + + // 16 through 31, 32-bit registers. 24-31 are extended. + // id is int value - 16. + eax, ecx, edx, ebx, esp, ebp, esi, edi, + r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d, + + // 32-47, 16-bit registers. 40-47 are extended. + // id is int value - 32. + ax, cx, dx, bx, sp, bp, si, di, + r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w, + + // 48-63, 8-bit registers. 56-63 are extended. + // id is int value - 48. + al, bl, cl, dl, ah, ch, dh, bh, + r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, + + /// Returns the bit-width of the register. + pub fn size(self: @This()) u7 { + return switch (@enumToInt(self)) { + 0...15 => 64, + 16...31 => 32, + 32...47 => 16, + 48...64 => 8, + else => unreachable, + }; + } + + /// Returns whether the register is *extended*. Extended registers are the + /// new registers added with amd64, r8 through r15. This also includes any + /// other variant of access to those registers, such as r8b, r15d, and so + /// on. This is needed because access to these registers requires special + /// handling via the REX prefix, via the B or R bits, depending on context. + pub fn isExtended(self: @This()) bool { + return @enumToInt(self) & 0x08 != 0; + } + + /// This returns the 4-bit register ID, which is used in practically every + /// opcode. Note that bit 3 (the highest bit) is *never* used directly in + /// an instruction (@see isExtended), and requires special handling. The + /// lower three bits are often embedded directly in instructions (such as + /// the B8 variant of moves), or used in R/M bytes. + pub fn id(self: @This()) u4 { + return @truncate(u4, @enumToInt(self)); + } +}; + +// zig fmt: on diff --git a/src-self-hosted/compilation.zig b/src-self-hosted/compilation.zig deleted file mode 100644 index cee4a48f93..0000000000 --- a/src-self-hosted/compilation.zig +++ /dev/null @@ -1,1434 +0,0 @@ -const std = @import("std"); -const io = std.io; -const mem = std.mem; -const Allocator = mem.Allocator; -const ArrayListSentineled = std.ArrayListSentineled; -const llvm = @import("llvm.zig"); -const c = @import("c.zig"); -const builtin = std.builtin; -const Target = std.Target; -const warn = std.debug.warn; -const Token = std.zig.Token; -const ArrayList = std.ArrayList; -const errmsg = @import("errmsg.zig"); -const ast = std.zig.ast; -const event = std.event; -const assert = std.debug.assert; -const AtomicRmwOp = builtin.AtomicRmwOp; -const AtomicOrder = builtin.AtomicOrder; -const Scope = @import("scope.zig").Scope; -const Decl = @import("decl.zig").Decl; -const ir = @import("ir.zig"); -const Visib = @import("visib.zig").Visib; -const Value = @import("value.zig").Value; -const Type = Value.Type; -const Span = errmsg.Span; -const Msg = errmsg.Msg; -const codegen = @import("codegen.zig"); -const Package = @import("package.zig").Package; -const link = @import("link.zig").link; -const LibCInstallation = @import("libc_installation.zig").LibCInstallation; -const CInt = @import("c_int.zig").CInt; -const fs = std.fs; -const util = @import("util.zig"); - -const max_src_size = 2 * 1024 * 1024 * 1024; // 2 GiB - -/// Data that is local to the event loop. -pub const ZigCompiler = struct { - llvm_handle_pool: std.atomic.Stack(*llvm.Context), - lld_lock: event.Lock, - allocator: *Allocator, - - /// TODO pool these so that it doesn't have to lock - prng: event.Locked(std.rand.DefaultPrng), - - native_libc: event.Future(LibCInstallation), - - var lazy_init_targets = std.once(util.initializeAllTargets); - - pub fn init(allocator: *Allocator) !ZigCompiler { - lazy_init_targets.call(); - - var seed_bytes: [@sizeOf(u64)]u8 = undefined; - try std.crypto.randomBytes(seed_bytes[0..]); - const seed = mem.readIntNative(u64, &seed_bytes); - - return ZigCompiler{ - .allocator = allocator, - .lld_lock = event.Lock.init(), - .llvm_handle_pool = std.atomic.Stack(*llvm.Context).init(), - .prng = event.Locked(std.rand.DefaultPrng).init(std.rand.DefaultPrng.init(seed)), - .native_libc = event.Future(LibCInstallation).init(), - }; - } - - /// Must be called only after EventLoop.run completes. - fn deinit(self: *ZigCompiler) void { - self.lld_lock.deinit(); - while (self.llvm_handle_pool.pop()) |node| { - llvm.ContextDispose(node.data); - self.allocator.destroy(node); - } - } - - /// Gets an exclusive handle on any LlvmContext. - /// Caller must release the handle when done. - pub fn getAnyLlvmContext(self: *ZigCompiler) !LlvmHandle { - if (self.llvm_handle_pool.pop()) |node| return LlvmHandle{ .node = node }; - - const context_ref = llvm.ContextCreate() orelse return error.OutOfMemory; - errdefer llvm.ContextDispose(context_ref); - - const node = try self.allocator.create(std.atomic.Stack(*llvm.Context).Node); - node.* = std.atomic.Stack(*llvm.Context).Node{ - .next = undefined, - .data = context_ref, - }; - errdefer self.allocator.destroy(node); - - return LlvmHandle{ .node = node }; - } - - pub fn getNativeLibC(self: *ZigCompiler) !*LibCInstallation { - if (self.native_libc.start()) |ptr| return ptr; - self.native_libc.data = try LibCInstallation.findNative(.{ .allocator = self.allocator }); - self.native_libc.resolve(); - return &self.native_libc.data; - } - - /// Must be called only once, ever. Sets global state. - pub fn setLlvmArgv(allocator: *Allocator, llvm_argv: []const []const u8) !void { - if (llvm_argv.len != 0) { - var c_compatible_args = try std.cstr.NullTerminated2DArray.fromSlices(allocator, &[_][]const []const u8{ - &[_][]const u8{"zig (LLVM option parsing)"}, - llvm_argv, - }); - defer c_compatible_args.deinit(); - c.ZigLLVMParseCommandLineOptions(llvm_argv.len + 1, c_compatible_args.ptr); - } - } -}; - -pub const LlvmHandle = struct { - node: *std.atomic.Stack(*llvm.Context).Node, - - pub fn release(self: LlvmHandle, zig_compiler: *ZigCompiler) void { - zig_compiler.llvm_handle_pool.push(self.node); - } -}; - -pub const Compilation = struct { - zig_compiler: *ZigCompiler, - name: ArrayListSentineled(u8, 0), - llvm_triple: ArrayListSentineled(u8, 0), - root_src_path: ?[]const u8, - target: std.Target, - llvm_target: *llvm.Target, - build_mode: builtin.Mode, - zig_lib_dir: []const u8, - zig_std_dir: []const u8, - - /// lazily created when we need it - tmp_dir: event.Future(BuildError![]u8) = event.Future(BuildError![]u8).init(), - - version: builtin.Version = builtin.Version{ .major = 0, .minor = 0, .patch = 0 }, - - linker_script: ?[]const u8 = null, - out_h_path: ?[]const u8 = null, - - is_test: bool = false, - strip: bool = false, - is_static: bool, - linker_rdynamic: bool = false, - - clang_argv: []const []const u8 = &[_][]const u8{}, - assembly_files: []const []const u8 = &[_][]const u8{}, - - /// paths that are explicitly provided by the user to link against - link_objects: []const []const u8 = &[_][]const u8{}, - - /// functions that have their own objects that we need to link - /// it uses an optional pointer so that tombstone removals are possible - fn_link_set: event.Locked(FnLinkSet) = event.Locked(FnLinkSet).init(FnLinkSet.init()), - - pub const FnLinkSet = std.TailQueue(?*Value.Fn); - - link_libs_list: ArrayList(*LinkLib), - libc_link_lib: ?*LinkLib = null, - - err_color: errmsg.Color = .Auto, - - verbose_tokenize: bool = false, - verbose_ast_tree: bool = false, - verbose_ast_fmt: bool = false, - verbose_cimport: bool = false, - verbose_ir: bool = false, - verbose_llvm_ir: bool = false, - verbose_link: bool = false, - - link_eh_frame_hdr: bool = false, - - darwin_version_min: DarwinVersionMin = .None, - - test_filters: []const []const u8 = &[_][]const u8{}, - test_name_prefix: ?[]const u8 = null, - - emit_bin: bool = true, - emit_asm: bool = false, - emit_llvm_ir: bool = false, - emit_h: bool = false, - - kind: Kind, - - events: *event.Channel(Event), - - exported_symbol_names: event.Locked(Decl.Table), - - /// Before code generation starts, must wait on this group to make sure - /// the build is complete. - prelink_group: event.Group(BuildError!void), - - compile_errors: event.Locked(CompileErrList), - - meta_type: *Type.MetaType, - void_type: *Type.Void, - bool_type: *Type.Bool, - noreturn_type: *Type.NoReturn, - comptime_int_type: *Type.ComptimeInt, - u8_type: *Type.Int, - - void_value: *Value.Void, - true_value: *Value.Bool, - false_value: *Value.Bool, - noreturn_value: *Value.NoReturn, - - target_machine: *llvm.TargetMachine, - target_data_ref: *llvm.TargetData, - target_layout_str: [*:0]u8, - target_ptr_bits: u32, - - /// for allocating things which have the same lifetime as this Compilation - arena_allocator: std.heap.ArenaAllocator, - - root_package: *Package, - std_package: *Package, - - override_libc: ?*LibCInstallation = null, - - /// need to wait on this group before deinitializing - deinit_group: event.Group(void), - - destroy_frame: *@Frame(createAsync), - main_loop_frame: *@Frame(Compilation.mainLoop), - main_loop_future: event.Future(void) = event.Future(void).init(), - - have_err_ret_tracing: bool = false, - - /// not locked because it is read-only - primitive_type_table: TypeTable, - - int_type_table: event.Locked(IntTypeTable), - array_type_table: event.Locked(ArrayTypeTable), - ptr_type_table: event.Locked(PtrTypeTable), - fn_type_table: event.Locked(FnTypeTable), - - c_int_types: [CInt.list.len]*Type.Int, - - fs_watch: *fs.Watch(*Scope.Root), - - cancelled: bool = false, - - const IntTypeTable = std.HashMap(*const Type.Int.Key, *Type.Int, Type.Int.Key.hash, Type.Int.Key.eql); - const ArrayTypeTable = std.HashMap(*const Type.Array.Key, *Type.Array, Type.Array.Key.hash, Type.Array.Key.eql); - const PtrTypeTable = std.HashMap(*const Type.Pointer.Key, *Type.Pointer, Type.Pointer.Key.hash, Type.Pointer.Key.eql); - const FnTypeTable = std.HashMap(*const Type.Fn.Key, *Type.Fn, Type.Fn.Key.hash, Type.Fn.Key.eql); - const TypeTable = std.StringHashMap(*Type); - - const CompileErrList = std.ArrayList(*Msg); - - // TODO handle some of these earlier and report them in a way other than error codes - pub const BuildError = error{ - OutOfMemory, - EndOfStream, - IsDir, - Unexpected, - SystemResources, - SharingViolation, - PathAlreadyExists, - FileNotFound, - AccessDenied, - PipeBusy, - FileTooBig, - SymLinkLoop, - ProcessFdQuotaExceeded, - NameTooLong, - SystemFdQuotaExceeded, - NoDevice, - NoSpaceLeft, - NotDir, - FileSystem, - OperationAborted, - IoPending, - BrokenPipe, - WouldBlock, - FileClosed, - DestinationAddressRequired, - DiskQuota, - InputOutput, - NoStdHandles, - Overflow, - NotSupported, - BufferTooSmall, - Unimplemented, // TODO remove this one - SemanticAnalysisFailed, // TODO remove this one - ReadOnlyFileSystem, - LinkQuotaExceeded, - EnvironmentVariableNotFound, - AppDataDirUnavailable, - LinkFailed, - LibCRequiredButNotProvidedOrFound, - LibCMissingDynamicLinker, - InvalidDarwinVersionString, - UnsupportedLinkArchitecture, - UserResourceLimitReached, - InvalidUtf8, - BadPathName, - DeviceBusy, - CurrentWorkingDirectoryUnlinked, - }; - - pub const Event = union(enum) { - Ok, - Error: BuildError, - Fail: []*Msg, - }; - - pub const DarwinVersionMin = union(enum) { - None, - MacOS: []const u8, - Ios: []const u8, - }; - - pub const Kind = enum { - Exe, - Lib, - Obj, - }; - - pub const LinkLib = struct { - name: []const u8, - path: ?[]const u8, - - /// the list of symbols we depend on from this lib - symbols: ArrayList([]u8), - provided_explicitly: bool, - }; - - pub const Emit = enum { - Binary, - Assembly, - LlvmIr, - }; - - pub fn create( - zig_compiler: *ZigCompiler, - name: []const u8, - root_src_path: ?[]const u8, - target: std.zig.CrossTarget, - kind: Kind, - build_mode: builtin.Mode, - is_static: bool, - zig_lib_dir: []const u8, - ) !*Compilation { - var optional_comp: ?*Compilation = null; - var frame = try zig_compiler.allocator.create(@Frame(createAsync)); - errdefer zig_compiler.allocator.destroy(frame); - frame.* = async createAsync( - &optional_comp, - zig_compiler, - name, - root_src_path, - target, - kind, - build_mode, - is_static, - zig_lib_dir, - ); - // TODO causes segfault - // return optional_comp orelse if (await frame) |_| unreachable else |err| err; - if (optional_comp) |comp| { - return comp; - } else if (await frame) |_| unreachable else |err| return err; - } - - async fn createAsync( - out_comp: *?*Compilation, - zig_compiler: *ZigCompiler, - name: []const u8, - root_src_path: ?[]const u8, - cross_target: std.zig.CrossTarget, - kind: Kind, - build_mode: builtin.Mode, - is_static: bool, - zig_lib_dir: []const u8, - ) !void { - const allocator = zig_compiler.allocator; - - // TODO merge this line with stage2.zig crossTargetToTarget - const target_info = try std.zig.system.NativeTargetInfo.detect(std.heap.c_allocator, cross_target); - const target = target_info.target; - - var comp = Compilation{ - .arena_allocator = std.heap.ArenaAllocator.init(allocator), - .zig_compiler = zig_compiler, - .events = undefined, - .root_src_path = root_src_path, - .target = target, - .llvm_target = undefined, - .kind = kind, - .build_mode = build_mode, - .zig_lib_dir = zig_lib_dir, - .zig_std_dir = undefined, - .destroy_frame = @frame(), - .main_loop_frame = undefined, - - .name = undefined, - .llvm_triple = undefined, - .is_static = is_static, - .link_libs_list = undefined, - .exported_symbol_names = event.Locked(Decl.Table).init(Decl.Table.init(allocator)), - .prelink_group = event.Group(BuildError!void).init(allocator), - .deinit_group = event.Group(void).init(allocator), - .compile_errors = event.Locked(CompileErrList).init(CompileErrList.init(allocator)), - .int_type_table = event.Locked(IntTypeTable).init(IntTypeTable.init(allocator)), - .array_type_table = event.Locked(ArrayTypeTable).init(ArrayTypeTable.init(allocator)), - .ptr_type_table = event.Locked(PtrTypeTable).init(PtrTypeTable.init(allocator)), - .fn_type_table = event.Locked(FnTypeTable).init(FnTypeTable.init(allocator)), - .c_int_types = undefined, - - .meta_type = undefined, - .void_type = undefined, - .void_value = undefined, - .bool_type = undefined, - .true_value = undefined, - .false_value = undefined, - .noreturn_type = undefined, - .noreturn_value = undefined, - .comptime_int_type = undefined, - .u8_type = undefined, - - .target_machine = undefined, - .target_data_ref = undefined, - .target_layout_str = undefined, - .target_ptr_bits = target.cpu.arch.ptrBitWidth(), - - .root_package = undefined, - .std_package = undefined, - - .primitive_type_table = undefined, - - .fs_watch = undefined, - }; - comp.link_libs_list = ArrayList(*LinkLib).init(comp.arena()); - comp.primitive_type_table = TypeTable.init(comp.arena()); - - defer { - comp.int_type_table.private_data.deinit(); - comp.array_type_table.private_data.deinit(); - comp.ptr_type_table.private_data.deinit(); - comp.fn_type_table.private_data.deinit(); - comp.arena_allocator.deinit(); - } - - comp.name = try ArrayListSentineled(u8, 0).init(comp.arena(), name); - comp.llvm_triple = try util.getLLVMTriple(comp.arena(), target); - comp.llvm_target = try util.llvmTargetFromTriple(comp.llvm_triple); - comp.zig_std_dir = try fs.path.join(comp.arena(), &[_][]const u8{ zig_lib_dir, "std" }); - - const opt_level = switch (build_mode) { - .Debug => llvm.CodeGenLevelNone, - else => llvm.CodeGenLevelAggressive, - }; - - const reloc_mode = if (is_static) llvm.RelocStatic else llvm.RelocPIC; - - var target_specific_cpu_args: ?[*:0]u8 = null; - var target_specific_cpu_features: ?[*:0]u8 = null; - defer llvm.DisposeMessage(target_specific_cpu_args); - defer llvm.DisposeMessage(target_specific_cpu_features); - - // TODO detect native CPU & features here - - comp.target_machine = llvm.CreateTargetMachine( - comp.llvm_target, - comp.llvm_triple.span(), - target_specific_cpu_args orelse "", - target_specific_cpu_features orelse "", - opt_level, - reloc_mode, - llvm.CodeModelDefault, - false, // TODO: add -ffunction-sections option - ) orelse return error.OutOfMemory; - defer llvm.DisposeTargetMachine(comp.target_machine); - - comp.target_data_ref = llvm.CreateTargetDataLayout(comp.target_machine) orelse return error.OutOfMemory; - defer llvm.DisposeTargetData(comp.target_data_ref); - - comp.target_layout_str = llvm.CopyStringRepOfTargetData(comp.target_data_ref) orelse return error.OutOfMemory; - defer llvm.DisposeMessage(comp.target_layout_str); - - comp.events = try allocator.create(event.Channel(Event)); - defer allocator.destroy(comp.events); - - comp.events.init(&[0]Event{}); - defer comp.events.deinit(); - - if (root_src_path) |root_src| { - const dirname = fs.path.dirname(root_src) orelse "."; - const basename = fs.path.basename(root_src); - - comp.root_package = try Package.create(comp.arena(), dirname, basename); - comp.std_package = try Package.create(comp.arena(), comp.zig_std_dir, "std.zig"); - try comp.root_package.add("std", comp.std_package); - } else { - comp.root_package = try Package.create(comp.arena(), ".", ""); - } - - comp.fs_watch = try fs.Watch(*Scope.Root).init(allocator, 16); - defer comp.fs_watch.deinit(); - - try comp.initTypes(); - defer comp.primitive_type_table.deinit(); - - comp.main_loop_frame = try allocator.create(@Frame(mainLoop)); - defer allocator.destroy(comp.main_loop_frame); - - comp.main_loop_frame.* = async comp.mainLoop(); - // Set this to indicate that initialization completed successfully. - // from here on out we must not return an error. - // This must occur before the first suspend/await. - out_comp.* = ∁ - // This suspend is resumed by destroy() - suspend; - // From here on is cleanup. - - comp.deinit_group.wait(); - - if (comp.tmp_dir.getOrNull()) |tmp_dir_result| - if (tmp_dir_result.*) |tmp_dir| { - fs.cwd().deleteTree(tmp_dir) catch {}; - } else |_| {}; - } - - /// it does ref the result because it could be an arbitrary integer size - pub fn getPrimitiveType(comp: *Compilation, name: []const u8) !?*Type { - if (name.len >= 2) { - switch (name[0]) { - 'i', 'u' => blk: { - for (name[1..]) |byte| - switch (byte) { - '0'...'9' => {}, - else => break :blk, - }; - const is_signed = name[0] == 'i'; - const bit_count = std.fmt.parseUnsigned(u32, name[1..], 10) catch |err| switch (err) { - error.Overflow => return error.Overflow, - error.InvalidCharacter => unreachable, // we just checked the characters above - }; - const int_type = try Type.Int.get(comp, Type.Int.Key{ - .bit_count = bit_count, - .is_signed = is_signed, - }); - errdefer int_type.base.base.deref(); - return &int_type.base; - }, - else => {}, - } - } - - if (comp.primitive_type_table.get(name)) |entry| { - entry.value.base.ref(); - return entry.value; - } - - return null; - } - - fn initTypes(comp: *Compilation) !void { - comp.meta_type = try comp.arena().create(Type.MetaType); - comp.meta_type.* = Type.MetaType{ - .base = Type{ - .name = "type", - .base = Value{ - .id = .Type, - .typ = undefined, - .ref_count = std.atomic.Int(usize).init(3), // 3 because it references itself twice - }, - .id = .Type, - .abi_alignment = Type.AbiAlignment.init(), - }, - .value = undefined, - }; - comp.meta_type.value = &comp.meta_type.base; - comp.meta_type.base.base.typ = &comp.meta_type.base; - assert((try comp.primitive_type_table.put(comp.meta_type.base.name, &comp.meta_type.base)) == null); - - comp.void_type = try comp.arena().create(Type.Void); - comp.void_type.* = Type.Void{ - .base = Type{ - .name = "void", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Void, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.void_type.base.name, &comp.void_type.base)) == null); - - comp.noreturn_type = try comp.arena().create(Type.NoReturn); - comp.noreturn_type.* = Type.NoReturn{ - .base = Type{ - .name = "noreturn", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .NoReturn, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.noreturn_type.base.name, &comp.noreturn_type.base)) == null); - - comp.comptime_int_type = try comp.arena().create(Type.ComptimeInt); - comp.comptime_int_type.* = Type.ComptimeInt{ - .base = Type{ - .name = "comptime_int", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .ComptimeInt, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.comptime_int_type.base.name, &comp.comptime_int_type.base)) == null); - - comp.bool_type = try comp.arena().create(Type.Bool); - comp.bool_type.* = Type.Bool{ - .base = Type{ - .name = "bool", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Bool, - .abi_alignment = Type.AbiAlignment.init(), - }, - }; - assert((try comp.primitive_type_table.put(comp.bool_type.base.name, &comp.bool_type.base)) == null); - - comp.void_value = try comp.arena().create(Value.Void); - comp.void_value.* = Value.Void{ - .base = Value{ - .id = .Void, - .typ = &Type.Void.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - }; - - comp.true_value = try comp.arena().create(Value.Bool); - comp.true_value.* = Value.Bool{ - .base = Value{ - .id = .Bool, - .typ = &Type.Bool.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .x = true, - }; - - comp.false_value = try comp.arena().create(Value.Bool); - comp.false_value.* = Value.Bool{ - .base = Value{ - .id = .Bool, - .typ = &Type.Bool.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .x = false, - }; - - comp.noreturn_value = try comp.arena().create(Value.NoReturn); - comp.noreturn_value.* = Value.NoReturn{ - .base = Value{ - .id = .NoReturn, - .typ = &Type.NoReturn.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - }; - - for (CInt.list) |cint, i| { - const c_int_type = try comp.arena().create(Type.Int); - c_int_type.* = Type.Int{ - .base = Type{ - .name = cint.zig_name, - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Int, - .abi_alignment = Type.AbiAlignment.init(), - }, - .key = Type.Int.Key{ - .is_signed = cint.is_signed, - .bit_count = cint.sizeInBits(comp.target), - }, - .garbage_node = undefined, - }; - comp.c_int_types[i] = c_int_type; - assert((try comp.primitive_type_table.put(cint.zig_name, &c_int_type.base)) == null); - } - comp.u8_type = try comp.arena().create(Type.Int); - comp.u8_type.* = Type.Int{ - .base = Type{ - .name = "u8", - .base = Value{ - .id = .Type, - .typ = &Type.MetaType.get(comp).base, - .ref_count = std.atomic.Int(usize).init(1), - }, - .id = .Int, - .abi_alignment = Type.AbiAlignment.init(), - }, - .key = Type.Int.Key{ - .is_signed = false, - .bit_count = 8, - }, - .garbage_node = undefined, - }; - assert((try comp.primitive_type_table.put(comp.u8_type.base.name, &comp.u8_type.base)) == null); - } - - pub fn destroy(self: *Compilation) void { - const allocator = self.gpa(); - self.cancelled = true; - await self.main_loop_frame; - resume self.destroy_frame; - allocator.destroy(self.destroy_frame); - } - - fn start(self: *Compilation) void { - self.main_loop_future.resolve(); - } - - async fn mainLoop(self: *Compilation) void { - // wait until start() is called - _ = self.main_loop_future.get(); - - var build_result = self.initialCompile(); - - while (!self.cancelled) { - const link_result = if (build_result) blk: { - break :blk self.maybeLink(); - } else |err| err; - // this makes a handy error return trace and stack trace in debug mode - if (std.debug.runtime_safety) { - link_result catch unreachable; - } - - const compile_errors = blk: { - const held = self.compile_errors.acquire(); - defer held.release(); - break :blk held.value.toOwnedSlice(); - }; - - if (link_result) |_| { - if (compile_errors.len == 0) { - self.events.put(Event.Ok); - } else { - self.events.put(Event{ .Fail = compile_errors }); - } - } else |err| { - // if there's an error then the compile errors have dangling references - self.gpa().free(compile_errors); - - self.events.put(Event{ .Error = err }); - } - - // First, get an item from the watch channel, waiting on the channel. - var group = event.Group(BuildError!void).init(self.gpa()); - { - const ev = (self.fs_watch.channel.get()) catch |err| { - build_result = err; - continue; - }; - const root_scope = ev.data; - group.call(rebuildFile, .{ self, root_scope }) catch |err| { - build_result = err; - continue; - }; - } - // Next, get all the items from the channel that are buffered up. - while (self.fs_watch.channel.getOrNull()) |ev_or_err| { - if (ev_or_err) |ev| { - const root_scope = ev.data; - group.call(rebuildFile, .{ self, root_scope }) catch |err| { - build_result = err; - continue; - }; - } else |err| { - build_result = err; - continue; - } - } - build_result = group.wait(); - } - } - - async fn rebuildFile(self: *Compilation, root_scope: *Scope.Root) BuildError!void { - const tree_scope = blk: { - const source_code = fs.cwd().readFileAlloc( - self.gpa(), - root_scope.realpath, - max_src_size, - ) catch |err| { - try self.addCompileErrorCli(root_scope.realpath, "unable to open: {}", .{@errorName(err)}); - return; - }; - errdefer self.gpa().free(source_code); - - const tree = try std.zig.parse(self.gpa(), source_code); - errdefer { - tree.deinit(); - } - - break :blk try Scope.AstTree.create(self, tree, root_scope); - }; - defer tree_scope.base.deref(self); - - var error_it = tree_scope.tree.errors.iterator(0); - while (error_it.next()) |parse_error| { - const msg = try Msg.createFromParseErrorAndScope(self, tree_scope, parse_error); - errdefer msg.destroy(); - - try self.addCompileErrorAsync(msg); - } - if (tree_scope.tree.errors.len != 0) { - return; - } - - const locked_table = root_scope.decls.table.acquireWrite(); - defer locked_table.release(); - - var decl_group = event.Group(BuildError!void).init(self.gpa()); - - try self.rebuildChangedDecls( - &decl_group, - locked_table.value, - root_scope.decls, - &tree_scope.tree.root_node.decls, - tree_scope, - ); - - try decl_group.wait(); - } - - fn rebuildChangedDecls( - self: *Compilation, - group: *event.Group(BuildError!void), - locked_table: *Decl.Table, - decl_scope: *Scope.Decls, - ast_decls: *ast.Node.Root.DeclList, - tree_scope: *Scope.AstTree, - ) !void { - var existing_decls = try locked_table.clone(); - defer existing_decls.deinit(); - - var ast_it = ast_decls.iterator(0); - while (ast_it.next()) |decl_ptr| { - const decl = decl_ptr.*; - switch (decl.id) { - .Comptime => { - const comptime_node = @fieldParentPtr(ast.Node.Comptime, "base", decl); - - // TODO connect existing comptime decls to updated source files - - try self.prelink_group.call(addCompTimeBlock, .{ self, tree_scope, &decl_scope.base, comptime_node }); - }, - .VarDecl => @panic("TODO"), - .FnProto => { - const fn_proto = @fieldParentPtr(ast.Node.FnProto, "base", decl); - - const name = if (fn_proto.name_token) |name_token| tree_scope.tree.tokenSlice(name_token) else { - try self.addCompileError(tree_scope, Span{ - .first = fn_proto.fn_token, - .last = fn_proto.fn_token + 1, - }, "missing function name", .{}); - continue; - }; - - if (existing_decls.remove(name)) |entry| { - // compare new code to existing - if (entry.value.cast(Decl.Fn)) |existing_fn_decl| { - // Just compare the old bytes to the new bytes of the top level decl. - // Even if the AST is technically the same, we want error messages to display - // from the most recent source. - const old_decl_src = existing_fn_decl.base.tree_scope.tree.getNodeSource( - &existing_fn_decl.fn_proto.base, - ); - const new_decl_src = tree_scope.tree.getNodeSource(&fn_proto.base); - if (mem.eql(u8, old_decl_src, new_decl_src)) { - // it's the same, we can skip this decl - continue; - } else { - @panic("TODO decl changed implementation"); - // Add the new thing before dereferencing the old thing. This way we don't end - // up pointlessly re-creating things we end up using in the new thing. - } - } else { - @panic("TODO decl changed kind"); - } - } else { - // add new decl - const fn_decl = try self.gpa().create(Decl.Fn); - fn_decl.* = Decl.Fn{ - .base = Decl{ - .id = Decl.Id.Fn, - .name = name, - .visib = parseVisibToken(tree_scope.tree, fn_proto.visib_token), - .resolution = event.Future(BuildError!void).init(), - .parent_scope = &decl_scope.base, - .tree_scope = tree_scope, - }, - .value = .Unresolved, - .fn_proto = fn_proto, - }; - tree_scope.base.ref(); - errdefer self.gpa().destroy(fn_decl); - - try group.call(addTopLevelDecl, .{ self, &fn_decl.base, locked_table }); - } - }, - .TestDecl => @panic("TODO"), - else => unreachable, - } - } - - var existing_decl_it = existing_decls.iterator(); - while (existing_decl_it.next()) |entry| { - // this decl was deleted - const existing_decl = entry.value; - @panic("TODO handle decl deletion"); - } - } - - fn initialCompile(self: *Compilation) !void { - if (self.root_src_path) |root_src_path| { - const root_scope = blk: { - // TODO async/await fs.realpath - const root_src_real_path = fs.realpathAlloc(self.gpa(), root_src_path) catch |err| { - try self.addCompileErrorCli(root_src_path, "unable to open: {}", .{@errorName(err)}); - return; - }; - errdefer self.gpa().free(root_src_real_path); - - break :blk try Scope.Root.create(self, root_src_real_path); - }; - defer root_scope.base.deref(self); - - // assert((try self.fs_watch.addFile(root_scope.realpath, root_scope)) == null); - try self.rebuildFile(root_scope); - } - } - - fn maybeLink(self: *Compilation) !void { - (self.prelink_group.wait()) catch |err| switch (err) { - error.SemanticAnalysisFailed => {}, - else => return err, - }; - - const any_prelink_errors = blk: { - const compile_errors = self.compile_errors.acquire(); - defer compile_errors.release(); - - break :blk compile_errors.value.len != 0; - }; - - if (!any_prelink_errors) { - try link(self); - } - } - - /// caller takes ownership of resulting Code - async fn genAndAnalyzeCode( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - node: *ast.Node, - expected_type: ?*Type, - ) !*ir.Code { - const unanalyzed_code = try ir.gen( - comp, - node, - tree_scope, - scope, - ); - defer unanalyzed_code.destroy(comp.gpa()); - - if (comp.verbose_ir) { - std.debug.warn("unanalyzed:\n", .{}); - unanalyzed_code.dump(); - } - - const analyzed_code = try ir.analyze( - comp, - unanalyzed_code, - expected_type, - ); - errdefer analyzed_code.destroy(comp.gpa()); - - if (comp.verbose_ir) { - std.debug.warn("analyzed:\n", .{}); - analyzed_code.dump(); - } - - return analyzed_code; - } - - async fn addCompTimeBlock( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - comptime_node: *ast.Node.Comptime, - ) BuildError!void { - const void_type = Type.Void.get(comp); - defer void_type.base.base.deref(comp); - - const analyzed_code = genAndAnalyzeCode( - comp, - tree_scope, - scope, - comptime_node.expr, - &void_type.base, - ) catch |err| switch (err) { - // This poison value should not cause the errdefers to run. It simply means - // that comp.compile_errors is populated. - error.SemanticAnalysisFailed => return {}, - else => return err, - }; - analyzed_code.destroy(comp.gpa()); - } - - async fn addTopLevelDecl( - self: *Compilation, - decl: *Decl, - locked_table: *Decl.Table, - ) BuildError!void { - const is_export = decl.isExported(decl.tree_scope.tree); - - if (is_export) { - try self.prelink_group.call(verifyUniqueSymbol, .{ self, decl }); - try self.prelink_group.call(resolveDecl, .{ self, decl }); - } - - const gop = try locked_table.getOrPut(decl.name); - if (gop.found_existing) { - try self.addCompileError(decl.tree_scope, decl.getSpan(), "redefinition of '{}'", .{decl.name}); - // TODO note: other definition here - } else { - gop.kv.value = decl; - } - } - - fn addCompileError(self: *Compilation, tree_scope: *Scope.AstTree, span: Span, comptime fmt: []const u8, args: var) !void { - const text = try std.fmt.allocPrint(self.gpa(), fmt, args); - errdefer self.gpa().free(text); - - const msg = try Msg.createFromScope(self, tree_scope, span, text); - errdefer msg.destroy(); - - try self.prelink_group.call(addCompileErrorAsync, .{ self, msg }); - } - - fn addCompileErrorCli(self: *Compilation, realpath: []const u8, comptime fmt: []const u8, args: var) !void { - const text = try std.fmt.allocPrint(self.gpa(), fmt, args); - errdefer self.gpa().free(text); - - const msg = try Msg.createFromCli(self, realpath, text); - errdefer msg.destroy(); - - try self.prelink_group.call(addCompileErrorAsync, .{ self, msg }); - } - - async fn addCompileErrorAsync( - self: *Compilation, - msg: *Msg, - ) BuildError!void { - errdefer msg.destroy(); - - const compile_errors = self.compile_errors.acquire(); - defer compile_errors.release(); - - try compile_errors.value.append(msg); - } - - async fn verifyUniqueSymbol(self: *Compilation, decl: *Decl) BuildError!void { - const exported_symbol_names = self.exported_symbol_names.acquire(); - defer exported_symbol_names.release(); - - if (try exported_symbol_names.value.put(decl.name, decl)) |other_decl| { - try self.addCompileError(decl.tree_scope, decl.getSpan(), "exported symbol collision: '{}'", .{ - decl.name, - }); - // TODO add error note showing location of other symbol - } - } - - pub fn haveLibC(self: *Compilation) bool { - return self.libc_link_lib != null; - } - - pub fn addLinkLib(self: *Compilation, name: []const u8, provided_explicitly: bool) !*LinkLib { - const is_libc = mem.eql(u8, name, "c"); - - if (is_libc) { - if (self.libc_link_lib) |libc_link_lib| { - return libc_link_lib; - } - } - - for (self.link_libs_list.span()) |existing_lib| { - if (mem.eql(u8, name, existing_lib.name)) { - return existing_lib; - } - } - - const link_lib = try self.gpa().create(LinkLib); - link_lib.* = LinkLib{ - .name = name, - .path = null, - .provided_explicitly = provided_explicitly, - .symbols = ArrayList([]u8).init(self.gpa()), - }; - try self.link_libs_list.append(link_lib); - if (is_libc) { - self.libc_link_lib = link_lib; - - // get a head start on looking for the native libc - // TODO this is missing a bunch of logic related to whether the target is native - // and whether we can build libc - if (self.override_libc == null) { - try self.deinit_group.call(startFindingNativeLibC, .{self}); - } - } - return link_lib; - } - - async fn startFindingNativeLibC(self: *Compilation) void { - event.Loop.startCpuBoundOperation(); - // we don't care if it fails, we're just trying to kick off the future resolution - _ = self.zig_compiler.getNativeLibC() catch return; - } - - /// General Purpose Allocator. Must free when done. - fn gpa(self: Compilation) *mem.Allocator { - return self.zig_compiler.allocator; - } - - /// Arena Allocator. Automatically freed when the Compilation is destroyed. - fn arena(self: *Compilation) *mem.Allocator { - return &self.arena_allocator.allocator; - } - - /// If the temporary directory for this compilation has not been created, it creates it. - /// Then it creates a random file name in that dir and returns it. - pub fn createRandomOutputPath(self: *Compilation, suffix: []const u8) !ArrayListSentineled(u8, 0) { - const tmp_dir = try self.getTmpDir(); - const file_prefix = self.getRandomFileName(); - - const file_name = try std.fmt.allocPrint(self.gpa(), "{}{}", .{ file_prefix[0..], suffix }); - defer self.gpa().free(file_name); - - const full_path = try fs.path.join(self.gpa(), &[_][]const u8{ tmp_dir, file_name[0..] }); - errdefer self.gpa().free(full_path); - - return ArrayListSentineled(u8, 0).fromOwnedSlice(self.gpa(), full_path); - } - - /// If the temporary directory for this Compilation has not been created, creates it. - /// Then returns it. The directory is unique to this Compilation and cleaned up when - /// the Compilation deinitializes. - fn getTmpDir(self: *Compilation) ![]const u8 { - if (self.tmp_dir.start()) |ptr| return ptr.*; - self.tmp_dir.data = self.getTmpDirImpl(); - self.tmp_dir.resolve(); - return self.tmp_dir.data; - } - - fn getTmpDirImpl(self: *Compilation) ![]u8 { - const comp_dir_name = self.getRandomFileName(); - const zig_dir_path = try getZigDir(self.gpa()); - defer self.gpa().free(zig_dir_path); - - const tmp_dir = try fs.path.join(self.arena(), &[_][]const u8{ zig_dir_path, comp_dir_name[0..] }); - try fs.cwd().makePath(tmp_dir); - return tmp_dir; - } - - fn getRandomFileName(self: *Compilation) [12]u8 { - // here we replace the standard +/ with -_ so that it can be used in a file name - const b64_fs_encoder = std.base64.Base64Encoder.init( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", - std.base64.standard_pad_char, - ); - - var rand_bytes: [9]u8 = undefined; - - { - const held = self.zig_compiler.prng.acquire(); - defer held.release(); - - held.value.random.bytes(rand_bytes[0..]); - } - - var result: [12]u8 = undefined; - b64_fs_encoder.encode(result[0..], &rand_bytes); - return result; - } - - fn registerGarbage(comp: *Compilation, comptime T: type, node: *std.atomic.Stack(*T).Node) void { - // TODO put the garbage somewhere - } - - /// Returns a value which has been ref()'d once - fn analyzeConstValue( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - node: *ast.Node, - expected_type: *Type, - ) !*Value { - var frame = try comp.gpa().create(@Frame(genAndAnalyzeCode)); - defer comp.gpa().destroy(frame); - frame.* = async comp.genAndAnalyzeCode(tree_scope, scope, node, expected_type); - const analyzed_code = try await frame; - defer analyzed_code.destroy(comp.gpa()); - - return analyzed_code.getCompTimeResult(comp); - } - - fn analyzeTypeExpr(comp: *Compilation, tree_scope: *Scope.AstTree, scope: *Scope, node: *ast.Node) !*Type { - const meta_type = &Type.MetaType.get(comp).base; - defer meta_type.base.deref(comp); - - const result_val = try comp.analyzeConstValue(tree_scope, scope, node, meta_type); - errdefer result_val.base.deref(comp); - - return result_val.cast(Type).?; - } - - /// This declaration has been blessed as going into the final code generation. - pub async fn resolveDecl(comp: *Compilation, decl: *Decl) BuildError!void { - if (decl.resolution.start()) |ptr| return ptr.*; - - decl.resolution.data = try generateDecl(comp, decl); - decl.resolution.resolve(); - return decl.resolution.data; - } -}; - -fn parseVisibToken(tree: *ast.Tree, optional_token_index: ?ast.TokenIndex) Visib { - if (optional_token_index) |token_index| { - const token = tree.tokens.at(token_index); - assert(token.id == Token.Id.Keyword_pub); - return Visib.Pub; - } else { - return Visib.Private; - } -} - -/// The function that actually does the generation. -fn generateDecl(comp: *Compilation, decl: *Decl) !void { - switch (decl.id) { - .Var => @panic("TODO"), - .Fn => { - const fn_decl = @fieldParentPtr(Decl.Fn, "base", decl); - return generateDeclFn(comp, fn_decl); - }, - .CompTime => @panic("TODO"), - } -} - -fn generateDeclFn(comp: *Compilation, fn_decl: *Decl.Fn) !void { - const tree_scope = fn_decl.base.tree_scope; - - const body_node = fn_decl.fn_proto.body_node orelse return generateDeclFnProto(comp, fn_decl); - - const fndef_scope = try Scope.FnDef.create(comp, fn_decl.base.parent_scope); - defer fndef_scope.base.deref(comp); - - const fn_type = try analyzeFnType(comp, tree_scope, fn_decl.base.parent_scope, fn_decl.fn_proto); - defer fn_type.base.base.deref(comp); - - var symbol_name = try std.ArrayListSentineled(u8, 0).init(comp.gpa(), fn_decl.base.name); - var symbol_name_consumed = false; - errdefer if (!symbol_name_consumed) symbol_name.deinit(); - - // The Decl.Fn owns the initial 1 reference count - const fn_val = try Value.Fn.create(comp, fn_type, fndef_scope, symbol_name); - fn_decl.value = .{ .Fn = fn_val }; - symbol_name_consumed = true; - - // Define local parameter variables - for (fn_type.key.data.Normal.params) |param, i| { - //AstNode *param_decl_node = get_param_decl_node(fn_table_entry, i); - const param_decl = @fieldParentPtr(ast.Node.ParamDecl, "base", fn_decl.fn_proto.params.at(i).*); - const name_token = param_decl.name_token orelse { - try comp.addCompileError(tree_scope, Span{ - .first = param_decl.firstToken(), - .last = param_decl.type_node.firstToken(), - }, "missing parameter name", .{}); - return error.SemanticAnalysisFailed; - }; - const param_name = tree_scope.tree.tokenSlice(name_token); - - // if (is_noalias && get_codegen_ptr_type(param_type) == nullptr) { - // add_node_error(g, param_decl_node, buf_sprintf("noalias on non-pointer parameter")); - // } - - // TODO check for shadowing - - const var_scope = try Scope.Var.createParam( - comp, - fn_val.child_scope, - param_name, - ¶m_decl.base, - i, - param.typ, - ); - fn_val.child_scope = &var_scope.base; - - try fn_type.non_key.Normal.variable_list.append(var_scope); - } - - var frame = try comp.gpa().create(@Frame(Compilation.genAndAnalyzeCode)); - defer comp.gpa().destroy(frame); - frame.* = async comp.genAndAnalyzeCode( - tree_scope, - fn_val.child_scope, - body_node, - fn_type.key.data.Normal.return_type, - ); - const analyzed_code = try await frame; - errdefer analyzed_code.destroy(comp.gpa()); - - assert(fn_val.block_scope != null); - - // Kick off rendering to LLVM module, but it doesn't block the fn decl - // analysis from being complete. - try comp.prelink_group.call(codegen.renderToLlvm, .{ comp, fn_val, analyzed_code }); - try comp.prelink_group.call(addFnToLinkSet, .{ comp, fn_val }); -} - -async fn addFnToLinkSet(comp: *Compilation, fn_val: *Value.Fn) Compilation.BuildError!void { - fn_val.base.ref(); - defer fn_val.base.deref(comp); - - fn_val.link_set_node.data = fn_val; - - const held = comp.fn_link_set.acquire(); - defer held.release(); - - held.value.append(fn_val.link_set_node); -} - -fn getZigDir(allocator: *mem.Allocator) ![]u8 { - return fs.getAppDataDir(allocator, "zig"); -} - -fn analyzeFnType( - comp: *Compilation, - tree_scope: *Scope.AstTree, - scope: *Scope, - fn_proto: *ast.Node.FnProto, -) !*Type.Fn { - const return_type_node = switch (fn_proto.return_type) { - .Explicit => |n| n, - .InferErrorSet => |n| n, - }; - const return_type = try comp.analyzeTypeExpr(tree_scope, scope, return_type_node); - return_type.base.deref(comp); - - var params = ArrayList(Type.Fn.Param).init(comp.gpa()); - var params_consumed = false; - defer if (!params_consumed) { - for (params.span()) |param| { - param.typ.base.deref(comp); - } - params.deinit(); - }; - - { - var it = fn_proto.params.iterator(0); - while (it.next()) |param_node_ptr| { - const param_node = param_node_ptr.*.cast(ast.Node.ParamDecl).?; - const param_type = try comp.analyzeTypeExpr(tree_scope, scope, param_node.type_node); - errdefer param_type.base.deref(comp); - try params.append(Type.Fn.Param{ - .typ = param_type, - .is_noalias = param_node.noalias_token != null, - }); - } - } - - const key = Type.Fn.Key{ - .alignment = null, - .data = Type.Fn.Key.Data{ - .Normal = Type.Fn.Key.Normal{ - .return_type = return_type, - .params = params.toOwnedSlice(), - .is_var_args = false, // TODO - .cc = .Unspecified, // TODO - }, - }, - }; - params_consumed = true; - var key_consumed = false; - defer if (!key_consumed) { - for (key.data.Normal.params) |param| { - param.typ.base.deref(comp); - } - comp.gpa().free(key.data.Normal.params); - }; - - const fn_type = try Type.Fn.get(comp, key); - key_consumed = true; - errdefer fn_type.base.base.deref(comp); - - return fn_type; -} - -fn generateDeclFnProto(comp: *Compilation, fn_decl: *Decl.Fn) !void { - const fn_type = try analyzeFnType( - comp, - fn_decl.base.tree_scope, - fn_decl.base.parent_scope, - fn_decl.fn_proto, - ); - defer fn_type.base.base.deref(comp); - - var symbol_name = try std.ArrayListSentineled(u8, 0).init(comp.gpa(), fn_decl.base.name); - var symbol_name_consumed = false; - defer if (!symbol_name_consumed) symbol_name.deinit(); - - // The Decl.Fn owns the initial 1 reference count - const fn_proto_val = try Value.FnProto.create(comp, fn_type, symbol_name); - fn_decl.value = .{ .FnProto = fn_proto_val }; - symbol_name_consumed = true; -} diff --git a/src-self-hosted/decl.zig b/src-self-hosted/decl.zig deleted file mode 100644 index e68a1458d6..0000000000 --- a/src-self-hosted/decl.zig +++ /dev/null @@ -1,102 +0,0 @@ -const std = @import("std"); -const Allocator = mem.Allocator; -const mem = std.mem; -const ast = std.zig.ast; -const Visib = @import("visib.zig").Visib; -const event = std.event; -const Value = @import("value.zig").Value; -const Token = std.zig.Token; -const errmsg = @import("errmsg.zig"); -const Scope = @import("scope.zig").Scope; -const Compilation = @import("compilation.zig").Compilation; - -pub const Decl = struct { - id: Id, - name: []const u8, - visib: Visib, - resolution: event.Future(Compilation.BuildError!void), - parent_scope: *Scope, - - // TODO when we destroy the decl, deref the tree scope - tree_scope: *Scope.AstTree, - - pub const Table = std.StringHashMap(*Decl); - - pub fn cast(base: *Decl, comptime T: type) ?*T { - if (base.id != @field(Id, @typeName(T))) return null; - return @fieldParentPtr(T, "base", base); - } - - pub fn isExported(base: *const Decl, tree: *ast.Tree) bool { - switch (base.id) { - .Fn => { - const fn_decl = @fieldParentPtr(Fn, "base", base); - return fn_decl.isExported(tree); - }, - else => return false, - } - } - - pub fn getSpan(base: *const Decl) errmsg.Span { - switch (base.id) { - .Fn => { - const fn_decl = @fieldParentPtr(Fn, "base", base); - const fn_proto = fn_decl.fn_proto; - const start = fn_proto.fn_token; - const end = fn_proto.name_token orelse start; - return errmsg.Span{ - .first = start, - .last = end + 1, - }; - }, - else => @panic("TODO"), - } - } - - pub fn findRootScope(base: *const Decl) *Scope.Root { - return base.parent_scope.findRoot(); - } - - pub const Id = enum { - Var, - Fn, - CompTime, - }; - - pub const Var = struct { - base: Decl, - }; - - pub const Fn = struct { - base: Decl, - value: union(enum) { - Unresolved, - Fn: *Value.Fn, - FnProto: *Value.FnProto, - }, - fn_proto: *ast.Node.FnProto, - - pub fn externLibName(self: Fn, tree: *ast.Tree) ?[]const u8 { - return if (self.fn_proto.extern_export_inline_token) |tok_index| x: { - const token = tree.tokens.at(tok_index); - break :x switch (token.id) { - .Extern => tree.tokenSlicePtr(token), - else => null, - }; - } else null; - } - - pub fn isExported(self: Fn, tree: *ast.Tree) bool { - if (self.fn_proto.extern_export_inline_token) |tok_index| { - const token = tree.tokens.at(tok_index); - return token.id == .Keyword_export; - } else { - return false; - } - } - }; - - pub const CompTime = struct { - base: Decl, - }; -}; diff --git a/src-self-hosted/errmsg.zig b/src-self-hosted/errmsg.zig deleted file mode 100644 index 5775c1df83..0000000000 --- a/src-self-hosted/errmsg.zig +++ /dev/null @@ -1,284 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const fs = std.fs; -const process = std.process; -const Token = std.zig.Token; -const ast = std.zig.ast; -const TokenIndex = std.zig.ast.TokenIndex; -const Compilation = @import("compilation.zig").Compilation; -const Scope = @import("scope.zig").Scope; - -pub const Color = enum { - Auto, - Off, - On, -}; - -pub const Span = struct { - first: ast.TokenIndex, - last: ast.TokenIndex, - - pub fn token(i: TokenIndex) Span { - return Span{ - .first = i, - .last = i, - }; - } - - pub fn node(n: *ast.Node) Span { - return Span{ - .first = n.firstToken(), - .last = n.lastToken(), - }; - } -}; - -pub const Msg = struct { - text: []u8, - realpath: []u8, - data: Data, - - const Data = union(enum) { - Cli: Cli, - PathAndTree: PathAndTree, - ScopeAndComp: ScopeAndComp, - }; - - const PathAndTree = struct { - span: Span, - tree: *ast.Tree, - allocator: *mem.Allocator, - }; - - const ScopeAndComp = struct { - span: Span, - tree_scope: *Scope.AstTree, - compilation: *Compilation, - }; - - const Cli = struct { - allocator: *mem.Allocator, - }; - - pub fn destroy(self: *Msg) void { - switch (self.data) { - .Cli => |cli| { - cli.allocator.free(self.text); - cli.allocator.free(self.realpath); - cli.allocator.destroy(self); - }, - .PathAndTree => |path_and_tree| { - path_and_tree.allocator.free(self.text); - path_and_tree.allocator.free(self.realpath); - path_and_tree.allocator.destroy(self); - }, - .ScopeAndComp => |scope_and_comp| { - scope_and_comp.tree_scope.base.deref(scope_and_comp.compilation); - scope_and_comp.compilation.gpa().free(self.text); - scope_and_comp.compilation.gpa().free(self.realpath); - scope_and_comp.compilation.gpa().destroy(self); - }, - } - } - - fn getAllocator(self: *const Msg) *mem.Allocator { - switch (self.data) { - .Cli => |cli| return cli.allocator, - .PathAndTree => |path_and_tree| { - return path_and_tree.allocator; - }, - .ScopeAndComp => |scope_and_comp| { - return scope_and_comp.compilation.gpa(); - }, - } - } - - pub fn getTree(self: *const Msg) *ast.Tree { - switch (self.data) { - .Cli => unreachable, - .PathAndTree => |path_and_tree| { - return path_and_tree.tree; - }, - .ScopeAndComp => |scope_and_comp| { - return scope_and_comp.tree_scope.tree; - }, - } - } - - pub fn getSpan(self: *const Msg) Span { - return switch (self.data) { - .Cli => unreachable, - .PathAndTree => |path_and_tree| path_and_tree.span, - .ScopeAndComp => |scope_and_comp| scope_and_comp.span, - }; - } - - /// Takes ownership of text - /// References tree_scope, and derefs when the msg is freed - pub fn createFromScope(comp: *Compilation, tree_scope: *Scope.AstTree, span: Span, text: []u8) !*Msg { - const realpath = try mem.dupe(comp.gpa(), u8, tree_scope.root().realpath); - errdefer comp.gpa().free(realpath); - - const msg = try comp.gpa().create(Msg); - msg.* = Msg{ - .text = text, - .realpath = realpath, - .data = Data{ - .ScopeAndComp = ScopeAndComp{ - .tree_scope = tree_scope, - .compilation = comp, - .span = span, - }, - }, - }; - tree_scope.base.ref(); - return msg; - } - - /// Caller owns returned Msg and must free with `allocator` - /// allocator will additionally be used for printing messages later. - pub fn createFromCli(comp: *Compilation, realpath: []const u8, text: []u8) !*Msg { - const realpath_copy = try mem.dupe(comp.gpa(), u8, realpath); - errdefer comp.gpa().free(realpath_copy); - - const msg = try comp.gpa().create(Msg); - msg.* = Msg{ - .text = text, - .realpath = realpath_copy, - .data = Data{ - .Cli = Cli{ .allocator = comp.gpa() }, - }, - }; - return msg; - } - - pub fn createFromParseErrorAndScope( - comp: *Compilation, - tree_scope: *Scope.AstTree, - parse_error: *const ast.Error, - ) !*Msg { - const loc_token = parse_error.loc(); - var text_buf = std.ArrayList(u8).init(comp.gpa()); - defer text_buf.deinit(); - - const realpath_copy = try mem.dupe(comp.gpa(), u8, tree_scope.root().realpath); - errdefer comp.gpa().free(realpath_copy); - - try parse_error.render(&tree_scope.tree.tokens, text_buf.outStream()); - - const msg = try comp.gpa().create(Msg); - msg.* = Msg{ - .text = undefined, - .realpath = realpath_copy, - .data = Data{ - .ScopeAndComp = ScopeAndComp{ - .tree_scope = tree_scope, - .compilation = comp, - .span = Span{ - .first = loc_token, - .last = loc_token, - }, - }, - }, - }; - tree_scope.base.ref(); - msg.text = text_buf.toOwnedSlice(); - return msg; - } - - /// `realpath` must outlive the returned Msg - /// `tree` must outlive the returned Msg - /// Caller owns returned Msg and must free with `allocator` - /// allocator will additionally be used for printing messages later. - pub fn createFromParseError( - allocator: *mem.Allocator, - parse_error: *const ast.Error, - tree: *ast.Tree, - realpath: []const u8, - ) !*Msg { - const loc_token = parse_error.loc(); - var text_buf = std.ArrayList(u8).init(allocator); - defer text_buf.deinit(); - - const realpath_copy = try mem.dupe(allocator, u8, realpath); - errdefer allocator.free(realpath_copy); - - try parse_error.render(&tree.tokens, text_buf.outStream()); - - const msg = try allocator.create(Msg); - msg.* = Msg{ - .text = undefined, - .realpath = realpath_copy, - .data = Data{ - .PathAndTree = PathAndTree{ - .allocator = allocator, - .tree = tree, - .span = Span{ - .first = loc_token, - .last = loc_token, - }, - }, - }, - }; - msg.text = text_buf.toOwnedSlice(); - errdefer allocator.destroy(msg); - - return msg; - } - - pub fn printToStream(msg: *const Msg, stream: var, color_on: bool) !void { - switch (msg.data) { - .Cli => { - try stream.print("{}:-:-: error: {}\n", .{ msg.realpath, msg.text }); - return; - }, - else => {}, - } - - const allocator = msg.getAllocator(); - const tree = msg.getTree(); - - const cwd = try process.getCwdAlloc(allocator); - defer allocator.free(cwd); - - const relpath = try fs.path.relative(allocator, cwd, msg.realpath); - defer allocator.free(relpath); - - const path = if (relpath.len < msg.realpath.len) relpath else msg.realpath; - const span = msg.getSpan(); - - const first_token = tree.tokens.at(span.first); - const last_token = tree.tokens.at(span.last); - const start_loc = tree.tokenLocationPtr(0, first_token); - const end_loc = tree.tokenLocationPtr(first_token.end, last_token); - if (!color_on) { - try stream.print("{}:{}:{}: error: {}\n", .{ - path, - start_loc.line + 1, - start_loc.column + 1, - msg.text, - }); - return; - } - - try stream.print("{}:{}:{}: error: {}\n{}\n", .{ - path, - start_loc.line + 1, - start_loc.column + 1, - msg.text, - tree.source[start_loc.line_start..start_loc.line_end], - }); - try stream.writeByteNTimes(' ', start_loc.column); - try stream.writeByteNTimes('~', last_token.end - first_token.start); - try stream.writeAll("\n"); - } - - pub fn printToFile(msg: *const Msg, file: fs.File, color: Color) !void { - const color_on = switch (color) { - .Auto => file.isTty(), - .On => true, - .Off => false, - }; - return msg.printToStream(file.outStream(), color_on); - } -}; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 6e58236ca8..330b1c4135 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -1,16 +1,9 @@ const std = @import("std"); -const mem = std.mem; -const Allocator = std.mem.Allocator; const Value = @import("value.zig").Value; const Type = @import("type.zig").Type; -const assert = std.debug.assert; -const BigIntConst = std.math.big.int.Const; -const BigIntMutable = std.math.big.int.Mutable; -const Target = std.Target; +const Module = @import("Module.zig"); -pub const text = @import("ir/text.zig"); - -/// These are in-memory, analyzed instructions. See `text.Inst` for the representation +/// These are in-memory, analyzed instructions. See `zir.Inst` for the representation /// of instructions that correspond to the ZIR text format. /// This struct owns the `Value` and `Type` memory. When the struct is deallocated, /// so are the `Value` and `Type`. The value of a constant must be copied into @@ -25,6 +18,7 @@ pub const Inst = struct { assembly, bitcast, breakpoint, + call, cmp, condbr, constant, @@ -84,6 +78,15 @@ pub const Inst = struct { args: void, }; + pub const Call = struct { + pub const base_tag = Tag.call; + base: Inst, + args: struct { + func: *Inst, + args: []const *Inst, + }, + }; + pub const Cmp = struct { pub const base_tag = Tag.cmp; @@ -152,1200 +155,3 @@ pub const Inst = struct { args: void, }; }; - -pub const TypedValue = struct { - ty: Type, - val: Value, -}; - -pub const Module = struct { - exports: []Export, - errors: []ErrorMsg, - arena: std.heap.ArenaAllocator, - fns: []Fn, - target: Target, - link_mode: std.builtin.LinkMode, - output_mode: std.builtin.OutputMode, - object_format: std.Target.ObjectFormat, - optimize_mode: std.builtin.Mode, - - pub const Export = struct { - name: []const u8, - typed_value: TypedValue, - src: usize, - }; - - pub const Fn = struct { - analysis_status: enum { in_progress, failure, success }, - body: Body, - fn_type: Type, - }; - - pub const Body = struct { - instructions: []*Inst, - }; - - pub fn deinit(self: *Module, allocator: *Allocator) void { - allocator.free(self.exports); - allocator.free(self.errors); - for (self.fns) |f| { - allocator.free(f.body.instructions); - } - allocator.free(self.fns); - self.arena.deinit(); - self.* = undefined; - } -}; - -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, -}; - -pub const AnalyzeOptions = struct { - target: Target, - output_mode: std.builtin.OutputMode, - link_mode: std.builtin.LinkMode, - object_format: ?std.Target.ObjectFormat = null, - optimize_mode: std.builtin.Mode, -}; - -pub fn analyze(allocator: *Allocator, old_module: text.Module, options: AnalyzeOptions) !Module { - var ctx = Analyze{ - .allocator = allocator, - .arena = std.heap.ArenaAllocator.init(allocator), - .old_module = &old_module, - .errors = std.ArrayList(ErrorMsg).init(allocator), - .decl_table = std.AutoHashMap(*text.Inst, Analyze.NewDecl).init(allocator), - .exports = std.ArrayList(Module.Export).init(allocator), - .fns = std.ArrayList(Module.Fn).init(allocator), - .target = options.target, - .optimize_mode = options.optimize_mode, - .link_mode = options.link_mode, - .output_mode = options.output_mode, - }; - defer ctx.errors.deinit(); - defer ctx.decl_table.deinit(); - defer ctx.exports.deinit(); - defer ctx.fns.deinit(); - errdefer ctx.arena.deinit(); - - ctx.analyzeRoot() catch |err| switch (err) { - error.AnalysisFail => { - assert(ctx.errors.items.len != 0); - }, - else => |e| return e, - }; - return Module{ - .exports = ctx.exports.toOwnedSlice(), - .errors = ctx.errors.toOwnedSlice(), - .fns = ctx.fns.toOwnedSlice(), - .arena = ctx.arena, - .target = ctx.target, - .link_mode = ctx.link_mode, - .output_mode = ctx.output_mode, - .object_format = options.object_format orelse ctx.target.getObjectFormat(), - .optimize_mode = ctx.optimize_mode, - }; -} - -const Analyze = struct { - allocator: *Allocator, - arena: std.heap.ArenaAllocator, - old_module: *const text.Module, - errors: std.ArrayList(ErrorMsg), - decl_table: std.AutoHashMap(*text.Inst, NewDecl), - exports: std.ArrayList(Module.Export), - fns: std.ArrayList(Module.Fn), - target: Target, - link_mode: std.builtin.LinkMode, - optimize_mode: std.builtin.Mode, - output_mode: std.builtin.OutputMode, - - const NewDecl = struct { - /// null means a semantic analysis error happened - ptr: ?*Inst, - }; - - const NewInst = struct { - /// null means a semantic analysis error happened - ptr: ?*Inst, - }; - - const Fn = struct { - /// Index into Module fns array - fn_index: usize, - inner_block: Block, - inst_table: std.AutoHashMap(*text.Inst, NewInst), - }; - - const Block = struct { - func: *Fn, - instructions: std.ArrayList(*Inst), - }; - - const InnerError = error{ OutOfMemory, AnalysisFail }; - - fn analyzeRoot(self: *Analyze) !void { - for (self.old_module.decls) |decl| { - if (decl.cast(text.Inst.Export)) |export_inst| { - try analyzeExport(self, null, export_inst); - } - } - } - - fn resolveInst(self: *Analyze, opt_block: ?*Block, old_inst: *text.Inst) InnerError!*Inst { - if (opt_block) |block| { - if (block.func.inst_table.get(old_inst)) |kv| { - return kv.value.ptr orelse return error.AnalysisFail; - } - } - - if (self.decl_table.get(old_inst)) |kv| { - return kv.value.ptr orelse return error.AnalysisFail; - } else { - const new_inst = self.analyzeInst(null, old_inst) catch |err| switch (err) { - error.AnalysisFail => { - try self.decl_table.putNoClobber(old_inst, .{ .ptr = null }); - return error.AnalysisFail; - }, - else => |e| return e, - }; - try self.decl_table.putNoClobber(old_inst, .{ .ptr = new_inst }); - return new_inst; - } - } - - fn requireRuntimeBlock(self: *Analyze, block: ?*Block, src: usize) !*Block { - return block orelse return self.fail(src, "instruction illegal outside function body", .{}); - } - - fn resolveInstConst(self: *Analyze, block: ?*Block, old_inst: *text.Inst) InnerError!TypedValue { - const new_inst = try self.resolveInst(block, old_inst); - const val = try self.resolveConstValue(new_inst); - return TypedValue{ - .ty = new_inst.ty, - .val = val, - }; - } - - fn resolveConstValue(self: *Analyze, base: *Inst) !Value { - return (try self.resolveDefinedValue(base)) orelse - return self.fail(base.src, "unable to resolve comptime value", .{}); - } - - fn resolveDefinedValue(self: *Analyze, base: *Inst) !?Value { - if (base.value()) |val| { - if (val.isUndef()) { - return self.fail(base.src, "use of undefined value here causes undefined behavior", .{}); - } - return val; - } - return null; - } - - fn resolveConstString(self: *Analyze, block: ?*Block, old_inst: *text.Inst) ![]u8 { - const new_inst = try self.resolveInst(block, old_inst); - const wanted_type = Type.initTag(.const_slice_u8); - const coerced_inst = try self.coerce(block, wanted_type, new_inst); - const val = try self.resolveConstValue(coerced_inst); - return val.toAllocatedBytes(&self.arena.allocator); - } - - fn resolveType(self: *Analyze, block: ?*Block, old_inst: *text.Inst) !Type { - const new_inst = try self.resolveInst(block, old_inst); - const wanted_type = Type.initTag(.@"type"); - const coerced_inst = try self.coerce(block, wanted_type, new_inst); - const val = try self.resolveConstValue(coerced_inst); - return val.toType(); - } - - fn analyzeExport(self: *Analyze, block: ?*Block, export_inst: *text.Inst.Export) !void { - const symbol_name = try self.resolveConstString(block, export_inst.positionals.symbol_name); - const typed_value = try self.resolveInstConst(block, export_inst.positionals.value); - - switch (typed_value.ty.zigTypeTag()) { - .Fn => {}, - else => return self.fail( - export_inst.positionals.value.src, - "unable to export type '{}'", - .{typed_value.ty}, - ), - } - try self.exports.append(.{ - .name = symbol_name, - .typed_value = typed_value, - .src = export_inst.base.src, - }); - } - - /// TODO should not need the cast on the last parameter at the callsites - fn addNewInstArgs( - self: *Analyze, - block: *Block, - src: usize, - ty: Type, - comptime T: type, - args: Inst.Args(T), - ) !*Inst { - const inst = try self.addNewInst(block, src, ty, T); - inst.args = args; - return &inst.base; - } - - fn addNewInst(self: *Analyze, block: *Block, src: usize, ty: Type, comptime T: type) !*T { - const inst = try self.arena.allocator.create(T); - inst.* = .{ - .base = .{ - .tag = T.base_tag, - .ty = ty, - .src = src, - }, - .args = undefined, - }; - try block.instructions.append(&inst.base); - return inst; - } - - fn constInst(self: *Analyze, src: usize, typed_value: TypedValue) !*Inst { - const const_inst = try self.arena.allocator.create(Inst.Constant); - const_inst.* = .{ - .base = .{ - .tag = Inst.Constant.base_tag, - .ty = typed_value.ty, - .src = src, - }, - .val = typed_value.val, - }; - return &const_inst.base; - } - - fn constStr(self: *Analyze, src: usize, str: []const u8) !*Inst { - const array_payload = try self.arena.allocator.create(Type.Payload.Array_u8_Sentinel0); - array_payload.* = .{ .len = str.len }; - - const ty_payload = try self.arena.allocator.create(Type.Payload.SingleConstPointer); - ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) }; - - const bytes_payload = try self.arena.allocator.create(Value.Payload.Bytes); - bytes_payload.* = .{ .data = str }; - - return self.constInst(src, .{ - .ty = Type.initPayload(&ty_payload.base), - .val = Value.initPayload(&bytes_payload.base), - }); - } - - fn constType(self: *Analyze, src: usize, ty: Type) !*Inst { - return self.constInst(src, .{ - .ty = Type.initTag(.type), - .val = try ty.toValue(&self.arena.allocator), - }); - } - - fn constVoid(self: *Analyze, src: usize) !*Inst { - return self.constInst(src, .{ - .ty = Type.initTag(.void), - .val = Value.initTag(.the_one_possible_value), - }); - } - - fn constUndef(self: *Analyze, src: usize, ty: Type) !*Inst { - return self.constInst(src, .{ - .ty = ty, - .val = Value.initTag(.undef), - }); - } - - fn constBool(self: *Analyze, src: usize, v: bool) !*Inst { - return self.constInst(src, .{ - .ty = Type.initTag(.bool), - .val = ([2]Value{ Value.initTag(.bool_false), Value.initTag(.bool_true) })[@boolToInt(v)], - }); - } - - fn constIntUnsigned(self: *Analyze, src: usize, ty: Type, int: u64) !*Inst { - const int_payload = try self.arena.allocator.create(Value.Payload.Int_u64); - int_payload.* = .{ .int = int }; - - return self.constInst(src, .{ - .ty = ty, - .val = Value.initPayload(&int_payload.base), - }); - } - - fn constIntSigned(self: *Analyze, src: usize, ty: Type, int: i64) !*Inst { - const int_payload = try self.arena.allocator.create(Value.Payload.Int_i64); - int_payload.* = .{ .int = int }; - - return self.constInst(src, .{ - .ty = ty, - .val = Value.initPayload(&int_payload.base), - }); - } - - fn constIntBig(self: *Analyze, src: usize, ty: Type, big_int: BigIntConst) !*Inst { - const val_payload = if (big_int.positive) blk: { - if (big_int.to(u64)) |x| { - return self.constIntUnsigned(src, ty, x); - } else |err| switch (err) { - error.NegativeIntoUnsigned => unreachable, - error.TargetTooSmall => {}, // handled below - } - const big_int_payload = try self.arena.allocator.create(Value.Payload.IntBigPositive); - big_int_payload.* = .{ .limbs = big_int.limbs }; - break :blk &big_int_payload.base; - } else blk: { - if (big_int.to(i64)) |x| { - return self.constIntSigned(src, ty, x); - } else |err| switch (err) { - error.NegativeIntoUnsigned => unreachable, - error.TargetTooSmall => {}, // handled below - } - const big_int_payload = try self.arena.allocator.create(Value.Payload.IntBigNegative); - big_int_payload.* = .{ .limbs = big_int.limbs }; - break :blk &big_int_payload.base; - }; - - return self.constInst(src, .{ - .ty = ty, - .val = Value.initPayload(val_payload), - }); - } - - fn analyzeInst(self: *Analyze, block: ?*Block, old_inst: *text.Inst) InnerError!*Inst { - switch (old_inst.tag) { - .breakpoint => return self.analyzeInstBreakpoint(block, old_inst.cast(text.Inst.Breakpoint).?), - .str => { - // We can use this reference because Inst.Const's Value is arena-allocated. - // The value would get copied to a MemoryCell before the `text.Inst.Str` lifetime ends. - const bytes = old_inst.cast(text.Inst.Str).?.positionals.bytes; - return self.constStr(old_inst.src, bytes); - }, - .int => { - const big_int = old_inst.cast(text.Inst.Int).?.positionals.int; - return self.constIntBig(old_inst.src, Type.initTag(.comptime_int), big_int); - }, - .ptrtoint => return self.analyzeInstPtrToInt(block, old_inst.cast(text.Inst.PtrToInt).?), - .fieldptr => return self.analyzeInstFieldPtr(block, old_inst.cast(text.Inst.FieldPtr).?), - .deref => return self.analyzeInstDeref(block, old_inst.cast(text.Inst.Deref).?), - .as => return self.analyzeInstAs(block, old_inst.cast(text.Inst.As).?), - .@"asm" => return self.analyzeInstAsm(block, old_inst.cast(text.Inst.Asm).?), - .@"unreachable" => return self.analyzeInstUnreachable(block, old_inst.cast(text.Inst.Unreachable).?), - .@"return" => return self.analyzeInstRet(block, old_inst.cast(text.Inst.Return).?), - .@"fn" => return self.analyzeInstFn(block, old_inst.cast(text.Inst.Fn).?), - .@"export" => { - try self.analyzeExport(block, old_inst.cast(text.Inst.Export).?); - return self.constVoid(old_inst.src); - }, - .primitive => return self.analyzeInstPrimitive(old_inst.cast(text.Inst.Primitive).?), - .fntype => return self.analyzeInstFnType(block, old_inst.cast(text.Inst.FnType).?), - .intcast => return self.analyzeInstIntCast(block, old_inst.cast(text.Inst.IntCast).?), - .bitcast => return self.analyzeInstBitCast(block, old_inst.cast(text.Inst.BitCast).?), - .elemptr => return self.analyzeInstElemPtr(block, old_inst.cast(text.Inst.ElemPtr).?), - .add => return self.analyzeInstAdd(block, old_inst.cast(text.Inst.Add).?), - .cmp => return self.analyzeInstCmp(block, old_inst.cast(text.Inst.Cmp).?), - .condbr => return self.analyzeInstCondBr(block, old_inst.cast(text.Inst.CondBr).?), - .isnull => return self.analyzeInstIsNull(block, old_inst.cast(text.Inst.IsNull).?), - .isnonnull => return self.analyzeInstIsNonNull(block, old_inst.cast(text.Inst.IsNonNull).?), - } - } - - fn analyzeInstBreakpoint(self: *Analyze, block: ?*Block, inst: *text.Inst.Breakpoint) InnerError!*Inst { - const b = try self.requireRuntimeBlock(block, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, Inst.Args(Inst.Breakpoint){}); - } - - fn analyzeInstFn(self: *Analyze, block: ?*Block, fn_inst: *text.Inst.Fn) InnerError!*Inst { - const fn_type = try self.resolveType(block, fn_inst.positionals.fn_type); - - var new_func: Fn = .{ - .fn_index = self.fns.items.len, - .inner_block = .{ - .func = undefined, - .instructions = std.ArrayList(*Inst).init(self.allocator), - }, - .inst_table = std.AutoHashMap(*text.Inst, NewInst).init(self.allocator), - }; - new_func.inner_block.func = &new_func; - defer new_func.inner_block.instructions.deinit(); - defer new_func.inst_table.deinit(); - // Don't hang on to a reference to this when analyzing body instructions, since the memory - // could become invalid. - (try self.fns.addOne()).* = .{ - .analysis_status = .in_progress, - .fn_type = fn_type, - .body = undefined, - }; - - try self.analyzeBody(&new_func.inner_block, fn_inst.positionals.body); - - const f = &self.fns.items[new_func.fn_index]; - f.analysis_status = .success; - f.body = .{ .instructions = new_func.inner_block.instructions.toOwnedSlice() }; - - const fn_payload = try self.arena.allocator.create(Value.Payload.Function); - fn_payload.* = .{ .index = new_func.fn_index }; - - return self.constInst(fn_inst.base.src, .{ - .ty = fn_type, - .val = Value.initPayload(&fn_payload.base), - }); - } - - fn analyzeInstFnType(self: *Analyze, block: ?*Block, fntype: *text.Inst.FnType) InnerError!*Inst { - const return_type = try self.resolveType(block, fntype.positionals.return_type); - - if (return_type.zigTypeTag() == .NoReturn and - fntype.positionals.param_types.len == 0 and - fntype.kw_args.cc == .Naked) - { - return self.constType(fntype.base.src, Type.initTag(.fn_naked_noreturn_no_args)); - } - - if (return_type.zigTypeTag() == .Void and - fntype.positionals.param_types.len == 0 and - fntype.kw_args.cc == .C) - { - return self.constType(fntype.base.src, Type.initTag(.fn_ccc_void_no_args)); - } - - return self.fail(fntype.base.src, "TODO implement fntype instruction more", .{}); - } - - fn analyzeInstPrimitive(self: *Analyze, primitive: *text.Inst.Primitive) InnerError!*Inst { - return self.constType(primitive.base.src, primitive.positionals.tag.toType()); - } - - fn analyzeInstAs(self: *Analyze, block: ?*Block, as: *text.Inst.As) InnerError!*Inst { - const dest_type = try self.resolveType(block, as.positionals.dest_type); - const new_inst = try self.resolveInst(block, as.positionals.value); - return self.coerce(block, dest_type, new_inst); - } - - fn analyzeInstPtrToInt(self: *Analyze, block: ?*Block, ptrtoint: *text.Inst.PtrToInt) InnerError!*Inst { - const ptr = try self.resolveInst(block, ptrtoint.positionals.ptr); - if (ptr.ty.zigTypeTag() != .Pointer) { - return self.fail(ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); - } - // TODO handle known-pointer-address - const b = try self.requireRuntimeBlock(block, ptrtoint.base.src); - const ty = Type.initTag(.usize); - return self.addNewInstArgs(b, ptrtoint.base.src, ty, Inst.PtrToInt, Inst.Args(Inst.PtrToInt){ .ptr = ptr }); - } - - fn analyzeInstFieldPtr(self: *Analyze, block: ?*Block, fieldptr: *text.Inst.FieldPtr) InnerError!*Inst { - const object_ptr = try self.resolveInst(block, fieldptr.positionals.object_ptr); - const field_name = try self.resolveConstString(block, fieldptr.positionals.field_name); - - const elem_ty = switch (object_ptr.ty.zigTypeTag()) { - .Pointer => object_ptr.ty.elemType(), - else => return self.fail(fieldptr.positionals.object_ptr.src, "expected pointer, found '{}'", .{object_ptr.ty}), - }; - switch (elem_ty.zigTypeTag()) { - .Array => { - if (mem.eql(u8, field_name, "len")) { - const len_payload = try self.arena.allocator.create(Value.Payload.Int_u64); - len_payload.* = .{ .int = elem_ty.arrayLen() }; - - const ref_payload = try self.arena.allocator.create(Value.Payload.RefVal); - ref_payload.* = .{ .val = Value.initPayload(&len_payload.base) }; - - return self.constInst(fieldptr.base.src, .{ - .ty = Type.initTag(.single_const_pointer_to_comptime_int), - .val = Value.initPayload(&ref_payload.base), - }); - } else { - return self.fail( - fieldptr.positionals.field_name.src, - "no member named '{}' in '{}'", - .{ field_name, elem_ty }, - ); - } - }, - else => return self.fail(fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), - } - } - - fn analyzeInstIntCast(self: *Analyze, block: ?*Block, intcast: *text.Inst.IntCast) InnerError!*Inst { - const dest_type = try self.resolveType(block, intcast.positionals.dest_type); - const new_inst = try self.resolveInst(block, intcast.positionals.value); - - const dest_is_comptime_int = switch (dest_type.zigTypeTag()) { - .ComptimeInt => true, - .Int => false, - else => return self.fail( - intcast.positionals.dest_type.src, - "expected integer type, found '{}'", - .{ - dest_type, - }, - ), - }; - - switch (new_inst.ty.zigTypeTag()) { - .ComptimeInt, .Int => {}, - else => return self.fail( - intcast.positionals.value.src, - "expected integer type, found '{}'", - .{new_inst.ty}, - ), - } - - if (dest_is_comptime_int or new_inst.value() != null) { - return self.coerce(block, dest_type, new_inst); - } - - return self.fail(intcast.base.src, "TODO implement analyze widen or shorten int", .{}); - } - - fn analyzeInstBitCast(self: *Analyze, block: ?*Block, inst: *text.Inst.BitCast) InnerError!*Inst { - const dest_type = try self.resolveType(block, inst.positionals.dest_type); - const operand = try self.resolveInst(block, inst.positionals.operand); - return self.bitcast(block, dest_type, operand); - } - - fn analyzeInstElemPtr(self: *Analyze, block: ?*Block, inst: *text.Inst.ElemPtr) InnerError!*Inst { - const array_ptr = try self.resolveInst(block, inst.positionals.array_ptr); - const uncasted_index = try self.resolveInst(block, inst.positionals.index); - const elem_index = try self.coerce(block, Type.initTag(.usize), uncasted_index); - - if (array_ptr.ty.isSinglePointer() and array_ptr.ty.elemType().zigTypeTag() == .Array) { - if (array_ptr.value()) |array_ptr_val| { - if (elem_index.value()) |index_val| { - // Both array pointer and index are compile-time known. - const index_u64 = index_val.toUnsignedInt(); - // @intCast here because it would have been impossible to construct a value that - // required a larger index. - const elem_val = try array_ptr_val.elemValueAt(&self.arena.allocator, @intCast(usize, index_u64)); - - const ref_payload = try self.arena.allocator.create(Value.Payload.RefVal); - ref_payload.* = .{ .val = elem_val }; - - const type_payload = try self.arena.allocator.create(Type.Payload.SingleConstPointer); - type_payload.* = .{ .pointee_type = array_ptr.ty.elemType().elemType() }; - - return self.constInst(inst.base.src, .{ - .ty = Type.initPayload(&type_payload.base), - .val = Value.initPayload(&ref_payload.base), - }); - } - } - } - - return self.fail(inst.base.src, "TODO implement more analyze elemptr", .{}); - } - - fn analyzeInstAdd(self: *Analyze, block: ?*Block, inst: *text.Inst.Add) InnerError!*Inst { - const lhs = try self.resolveInst(block, inst.positionals.lhs); - const rhs = try self.resolveInst(block, inst.positionals.rhs); - - if (lhs.ty.zigTypeTag() == .Int and rhs.ty.zigTypeTag() == .Int) { - if (lhs.value()) |lhs_val| { - if (rhs.value()) |rhs_val| { - // TODO is this a performance issue? maybe we should try the operation without - // resorting to BigInt first. - var lhs_space: Value.BigIntSpace = undefined; - var rhs_space: Value.BigIntSpace = undefined; - const lhs_bigint = lhs_val.toBigInt(&lhs_space); - const rhs_bigint = rhs_val.toBigInt(&rhs_space); - const limbs = try self.arena.allocator.alloc( - std.math.big.Limb, - std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1, - ); - var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined }; - result_bigint.add(lhs_bigint, rhs_bigint); - const result_limbs = result_bigint.limbs[0..result_bigint.len]; - - if (!lhs.ty.eql(rhs.ty)) { - return self.fail(inst.base.src, "TODO implement peer type resolution", .{}); - } - - const val_payload = if (result_bigint.positive) blk: { - const val_payload = try self.arena.allocator.create(Value.Payload.IntBigPositive); - val_payload.* = .{ .limbs = result_limbs }; - break :blk &val_payload.base; - } else blk: { - const val_payload = try self.arena.allocator.create(Value.Payload.IntBigNegative); - val_payload.* = .{ .limbs = result_limbs }; - break :blk &val_payload.base; - }; - - return self.constInst(inst.base.src, .{ - .ty = lhs.ty, - .val = Value.initPayload(val_payload), - }); - } - } - } - - return self.fail(inst.base.src, "TODO implement more analyze add", .{}); - } - - fn analyzeInstDeref(self: *Analyze, block: ?*Block, deref: *text.Inst.Deref) InnerError!*Inst { - const ptr = try self.resolveInst(block, deref.positionals.ptr); - const elem_ty = switch (ptr.ty.zigTypeTag()) { - .Pointer => ptr.ty.elemType(), - else => return self.fail(deref.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}), - }; - if (ptr.value()) |val| { - return self.constInst(deref.base.src, .{ - .ty = elem_ty, - .val = val.pointerDeref(), - }); - } - - return self.fail(deref.base.src, "TODO implement runtime deref", .{}); - } - - fn analyzeInstAsm(self: *Analyze, block: ?*Block, assembly: *text.Inst.Asm) InnerError!*Inst { - const return_type = try self.resolveType(block, assembly.positionals.return_type); - const asm_source = try self.resolveConstString(block, assembly.positionals.asm_source); - const output = if (assembly.kw_args.output) |o| try self.resolveConstString(block, o) else null; - - const inputs = try self.arena.allocator.alloc([]const u8, assembly.kw_args.inputs.len); - const clobbers = try self.arena.allocator.alloc([]const u8, assembly.kw_args.clobbers.len); - const args = try self.arena.allocator.alloc(*Inst, assembly.kw_args.args.len); - - for (inputs) |*elem, i| { - elem.* = try self.resolveConstString(block, assembly.kw_args.inputs[i]); - } - for (clobbers) |*elem, i| { - elem.* = try self.resolveConstString(block, assembly.kw_args.clobbers[i]); - } - for (args) |*elem, i| { - const arg = try self.resolveInst(block, assembly.kw_args.args[i]); - elem.* = try self.coerce(block, Type.initTag(.usize), arg); - } - - const b = try self.requireRuntimeBlock(block, assembly.base.src); - return self.addNewInstArgs(b, assembly.base.src, return_type, Inst.Assembly, Inst.Args(Inst.Assembly){ - .asm_source = asm_source, - .is_volatile = assembly.kw_args.@"volatile", - .output = output, - .inputs = inputs, - .clobbers = clobbers, - .args = args, - }); - } - - fn analyzeInstCmp(self: *Analyze, block: ?*Block, inst: *text.Inst.Cmp) InnerError!*Inst { - const lhs = try self.resolveInst(block, inst.positionals.lhs); - const rhs = try self.resolveInst(block, inst.positionals.rhs); - const op = inst.positionals.op; - - const is_equality_cmp = switch (op) { - .eq, .neq => true, - else => false, - }; - const lhs_ty_tag = lhs.ty.zigTypeTag(); - const rhs_ty_tag = rhs.ty.zigTypeTag(); - if (is_equality_cmp and lhs_ty_tag == .Null and rhs_ty_tag == .Null) { - // null == null, null != null - return self.constBool(inst.base.src, op == .eq); - } else if (is_equality_cmp and - ((lhs_ty_tag == .Null and rhs_ty_tag == .Optional) or - rhs_ty_tag == .Null and lhs_ty_tag == .Optional)) - { - // comparing null with optionals - const opt_operand = if (lhs_ty_tag == .Optional) lhs else rhs; - if (opt_operand.value()) |opt_val| { - const is_null = opt_val.isNull(); - return self.constBool(inst.base.src, if (op == .eq) is_null else !is_null); - } - const b = try self.requireRuntimeBlock(block, inst.base.src); - switch (op) { - .eq => return self.addNewInstArgs( - b, - inst.base.src, - Type.initTag(.bool), - Inst.IsNull, - Inst.Args(Inst.IsNull){ .operand = opt_operand }, - ), - .neq => return self.addNewInstArgs( - b, - inst.base.src, - Type.initTag(.bool), - Inst.IsNonNull, - Inst.Args(Inst.IsNonNull){ .operand = opt_operand }, - ), - else => unreachable, - } - } else if (is_equality_cmp and - ((lhs_ty_tag == .Null and rhs.ty.isCPtr()) or (rhs_ty_tag == .Null and lhs.ty.isCPtr()))) - { - return self.fail(inst.base.src, "TODO implement C pointer cmp", .{}); - } else if (lhs_ty_tag == .Null or rhs_ty_tag == .Null) { - const non_null_type = if (lhs_ty_tag == .Null) rhs.ty else lhs.ty; - return self.fail(inst.base.src, "comparison of '{}' with null", .{non_null_type}); - } else if (is_equality_cmp and - ((lhs_ty_tag == .EnumLiteral and rhs_ty_tag == .Union) or - (rhs_ty_tag == .EnumLiteral and lhs_ty_tag == .Union))) - { - return self.fail(inst.base.src, "TODO implement equality comparison between a union's tag value and an enum literal", .{}); - } else if (lhs_ty_tag == .ErrorSet and rhs_ty_tag == .ErrorSet) { - if (!is_equality_cmp) { - return self.fail(inst.base.src, "{} operator not allowed for errors", .{@tagName(op)}); - } - return self.fail(inst.base.src, "TODO implement equality comparison between errors", .{}); - } else if (lhs.ty.isNumeric() and rhs.ty.isNumeric()) { - // This operation allows any combination of integer and float types, regardless of the - // signed-ness, comptime-ness, and bit-width. So peer type resolution is incorrect for - // numeric types. - return self.cmpNumeric(block, inst.base.src, lhs, rhs, op); - } - return self.fail(inst.base.src, "TODO implement more cmp analysis", .{}); - } - - fn analyzeInstIsNull(self: *Analyze, block: ?*Block, inst: *text.Inst.IsNull) InnerError!*Inst { - const operand = try self.resolveInst(block, inst.positionals.operand); - return self.analyzeIsNull(block, inst.base.src, operand, true); - } - - fn analyzeInstIsNonNull(self: *Analyze, block: ?*Block, inst: *text.Inst.IsNonNull) InnerError!*Inst { - const operand = try self.resolveInst(block, inst.positionals.operand); - return self.analyzeIsNull(block, inst.base.src, operand, false); - } - - fn analyzeInstCondBr(self: *Analyze, block: ?*Block, inst: *text.Inst.CondBr) InnerError!*Inst { - const uncasted_cond = try self.resolveInst(block, inst.positionals.condition); - const cond = try self.coerce(block, Type.initTag(.bool), uncasted_cond); - - if (try self.resolveDefinedValue(cond)) |cond_val| { - const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; - try self.analyzeBody(block, body.*); - return self.constVoid(inst.base.src); - } - - const parent_block = try self.requireRuntimeBlock(block, inst.base.src); - - var true_block: Block = .{ - .func = parent_block.func, - .instructions = std.ArrayList(*Inst).init(self.allocator), - }; - defer true_block.instructions.deinit(); - try self.analyzeBody(&true_block, inst.positionals.true_body); - - var false_block: Block = .{ - .func = parent_block.func, - .instructions = std.ArrayList(*Inst).init(self.allocator), - }; - defer false_block.instructions.deinit(); - try self.analyzeBody(&false_block, inst.positionals.false_body); - - // Copy the instruction pointers to the arena memory - const true_instructions = try self.arena.allocator.alloc(*Inst, true_block.instructions.items.len); - const false_instructions = try self.arena.allocator.alloc(*Inst, false_block.instructions.items.len); - - mem.copy(*Inst, true_instructions, true_block.instructions.items); - mem.copy(*Inst, false_instructions, false_block.instructions.items); - - return self.addNewInstArgs(parent_block, inst.base.src, Type.initTag(.void), Inst.CondBr, Inst.Args(Inst.CondBr){ - .condition = cond, - .true_body = .{ .instructions = true_instructions }, - .false_body = .{ .instructions = false_instructions }, - }); - } - - fn wantSafety(self: *Analyze, block: ?*Block) bool { - return switch (self.optimize_mode) { - .Debug => true, - .ReleaseSafe => true, - .ReleaseFast => false, - .ReleaseSmall => false, - }; - } - - fn analyzeInstUnreachable(self: *Analyze, block: ?*Block, unreach: *text.Inst.Unreachable) InnerError!*Inst { - const b = try self.requireRuntimeBlock(block, unreach.base.src); - if (self.wantSafety(block)) { - // TODO Once we have a panic function to call, call it here instead of this. - _ = try self.addNewInstArgs(b, unreach.base.src, Type.initTag(.void), Inst.Breakpoint, {}); - } - return self.addNewInstArgs(b, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {}); - } - - fn analyzeInstRet(self: *Analyze, block: ?*Block, inst: *text.Inst.Return) InnerError!*Inst { - const b = try self.requireRuntimeBlock(block, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.Ret, {}); - } - - fn analyzeBody(self: *Analyze, block: ?*Block, body: text.Module.Body) !void { - for (body.instructions) |src_inst| { - const new_inst = self.analyzeInst(block, src_inst) catch |err| { - if (block) |b| { - self.fns.items[b.func.fn_index].analysis_status = .failure; - try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = null }); - } - return err; - }; - if (block) |b| try b.func.inst_table.putNoClobber(src_inst, .{ .ptr = new_inst }); - } - } - - fn analyzeIsNull( - self: *Analyze, - block: ?*Block, - src: usize, - operand: *Inst, - invert_logic: bool, - ) InnerError!*Inst { - return self.fail(src, "TODO implement analysis of isnull and isnotnull", .{}); - } - - /// Asserts that lhs and rhs types are both numeric. - fn cmpNumeric( - self: *Analyze, - block: ?*Block, - src: usize, - lhs: *Inst, - rhs: *Inst, - op: std.math.CompareOperator, - ) !*Inst { - assert(lhs.ty.isNumeric()); - assert(rhs.ty.isNumeric()); - - const lhs_ty_tag = lhs.ty.zigTypeTag(); - const rhs_ty_tag = rhs.ty.zigTypeTag(); - - if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) { - if (lhs.ty.arrayLen() != rhs.ty.arrayLen()) { - return self.fail(src, "vector length mismatch: {} and {}", .{ - lhs.ty.arrayLen(), - rhs.ty.arrayLen(), - }); - } - return self.fail(src, "TODO implement support for vectors in cmpNumeric", .{}); - } else if (lhs_ty_tag == .Vector or rhs_ty_tag == .Vector) { - return self.fail(src, "mixed scalar and vector operands to comparison operator: '{}' and '{}'", .{ - lhs.ty, - rhs.ty, - }); - } - - if (lhs.value()) |lhs_val| { - if (rhs.value()) |rhs_val| { - return self.constBool(src, Value.compare(lhs_val, op, rhs_val)); - } - } - - // TODO handle comparisons against lazy zero values - // Some values can be compared against zero without being runtime known or without forcing - // a full resolution of their value, for example `@sizeOf(@Frame(function))` is known to - // always be nonzero, and we benefit from not forcing the full evaluation and stack frame layout - // of this function if we don't need to. - - // It must be a runtime comparison. - const b = try self.requireRuntimeBlock(block, src); - // For floats, emit a float comparison instruction. - const lhs_is_float = switch (lhs_ty_tag) { - .Float, .ComptimeFloat => true, - else => false, - }; - const rhs_is_float = switch (rhs_ty_tag) { - .Float, .ComptimeFloat => true, - else => false, - }; - if (lhs_is_float and rhs_is_float) { - // Implicit cast the smaller one to the larger one. - const dest_type = x: { - if (lhs_ty_tag == .ComptimeFloat) { - break :x rhs.ty; - } else if (rhs_ty_tag == .ComptimeFloat) { - break :x lhs.ty; - } - if (lhs.ty.floatBits(self.target) >= rhs.ty.floatBits(self.target)) { - break :x lhs.ty; - } else { - break :x rhs.ty; - } - }; - const casted_lhs = try self.coerce(block, dest_type, lhs); - const casted_rhs = try self.coerce(block, dest_type, rhs); - return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ - .lhs = casted_lhs, - .rhs = casted_rhs, - .op = op, - }); - } - // For mixed unsigned integer sizes, implicit cast both operands to the larger integer. - // For mixed signed and unsigned integers, implicit cast both operands to a signed - // integer with + 1 bit. - // For mixed floats and integers, extract the integer part from the float, cast that to - // a signed integer with mantissa bits + 1, and if there was any non-integral part of the float, - // add/subtract 1. - const lhs_is_signed = if (lhs.value()) |lhs_val| - lhs_val.compareWithZero(.lt) - else - (lhs.ty.isFloat() or lhs.ty.isSignedInt()); - const rhs_is_signed = if (rhs.value()) |rhs_val| - rhs_val.compareWithZero(.lt) - else - (rhs.ty.isFloat() or rhs.ty.isSignedInt()); - const dest_int_is_signed = lhs_is_signed or rhs_is_signed; - - var dest_float_type: ?Type = null; - - var lhs_bits: usize = undefined; - if (lhs.value()) |lhs_val| { - if (lhs_val.isUndef()) - return self.constUndef(src, Type.initTag(.bool)); - const is_unsigned = if (lhs_is_float) x: { - var bigint_space: Value.BigIntSpace = undefined; - var bigint = try lhs_val.toBigInt(&bigint_space).toManaged(self.allocator); - defer bigint.deinit(); - const zcmp = lhs_val.orderAgainstZero(); - if (lhs_val.floatHasFraction()) { - switch (op) { - .eq => return self.constBool(src, false), - .neq => return self.constBool(src, true), - else => {}, - } - if (zcmp == .lt) { - try bigint.addScalar(bigint.toConst(), -1); - } else { - try bigint.addScalar(bigint.toConst(), 1); - } - } - lhs_bits = bigint.toConst().bitCountTwosComp(); - break :x (zcmp != .lt); - } else x: { - lhs_bits = lhs_val.intBitCountTwosComp(); - break :x (lhs_val.orderAgainstZero() != .lt); - }; - lhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); - } else if (lhs_is_float) { - dest_float_type = lhs.ty; - } else { - const int_info = lhs.ty.intInfo(self.target); - lhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); - } - - var rhs_bits: usize = undefined; - if (rhs.value()) |rhs_val| { - if (rhs_val.isUndef()) - return self.constUndef(src, Type.initTag(.bool)); - const is_unsigned = if (rhs_is_float) x: { - var bigint_space: Value.BigIntSpace = undefined; - var bigint = try rhs_val.toBigInt(&bigint_space).toManaged(self.allocator); - defer bigint.deinit(); - const zcmp = rhs_val.orderAgainstZero(); - if (rhs_val.floatHasFraction()) { - switch (op) { - .eq => return self.constBool(src, false), - .neq => return self.constBool(src, true), - else => {}, - } - if (zcmp == .lt) { - try bigint.addScalar(bigint.toConst(), -1); - } else { - try bigint.addScalar(bigint.toConst(), 1); - } - } - rhs_bits = bigint.toConst().bitCountTwosComp(); - break :x (zcmp != .lt); - } else x: { - rhs_bits = rhs_val.intBitCountTwosComp(); - break :x (rhs_val.orderAgainstZero() != .lt); - }; - rhs_bits += @boolToInt(is_unsigned and dest_int_is_signed); - } else if (rhs_is_float) { - dest_float_type = rhs.ty; - } else { - const int_info = rhs.ty.intInfo(self.target); - rhs_bits = int_info.bits + @boolToInt(!int_info.signed and dest_int_is_signed); - } - - const dest_type = if (dest_float_type) |ft| ft else blk: { - const max_bits = std.math.max(lhs_bits, rhs_bits); - const casted_bits = std.math.cast(u16, max_bits) catch |err| switch (err) { - error.Overflow => return self.fail(src, "{} exceeds maximum integer bit count", .{max_bits}), - }; - break :blk try self.makeIntType(dest_int_is_signed, casted_bits); - }; - const casted_lhs = try self.coerce(block, dest_type, lhs); - const casted_rhs = try self.coerce(block, dest_type, lhs); - - return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, Inst.Args(Inst.Cmp){ - .lhs = casted_lhs, - .rhs = casted_rhs, - .op = op, - }); - } - - fn makeIntType(self: *Analyze, signed: bool, bits: u16) !Type { - if (signed) { - const int_payload = try self.arena.allocator.create(Type.Payload.IntSigned); - int_payload.* = .{ .bits = bits }; - return Type.initPayload(&int_payload.base); - } else { - const int_payload = try self.arena.allocator.create(Type.Payload.IntUnsigned); - int_payload.* = .{ .bits = bits }; - return Type.initPayload(&int_payload.base); - } - } - - fn coerce(self: *Analyze, block: ?*Block, dest_type: Type, inst: *Inst) !*Inst { - // If the types are the same, we can return the operand. - if (dest_type.eql(inst.ty)) - return inst; - - const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty); - if (in_memory_result == .ok) { - return self.bitcast(block, dest_type, inst); - } - - // *[N]T to []T - if (inst.ty.isSinglePointer() and dest_type.isSlice() and - (!inst.ty.pointerIsConst() or dest_type.pointerIsConst())) - { - const array_type = inst.ty.elemType(); - const dst_elem_type = dest_type.elemType(); - if (array_type.zigTypeTag() == .Array and - coerceInMemoryAllowed(dst_elem_type, array_type.elemType()) == .ok) - { - return self.coerceArrayPtrToSlice(dest_type, inst); - } - } - - // comptime_int to fixed-width integer - if (inst.ty.zigTypeTag() == .ComptimeInt and dest_type.zigTypeTag() == .Int) { - // The representation is already correct; we only need to make sure it fits in the destination type. - const val = inst.value().?; // comptime_int always has comptime known value - if (!val.intFitsInType(dest_type, self.target)) { - return self.fail(inst.src, "type {} cannot represent integer value {}", .{ inst.ty, val }); - } - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); - } - - // integer widening - if (inst.ty.zigTypeTag() == .Int and dest_type.zigTypeTag() == .Int) { - const src_info = inst.ty.intInfo(self.target); - const dst_info = dest_type.intInfo(self.target); - if (src_info.signed == dst_info.signed and dst_info.bits >= src_info.bits) { - if (inst.value()) |val| { - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); - } else { - return self.fail(inst.src, "TODO implement runtime integer widening", .{}); - } - } else { - return self.fail(inst.src, "TODO implement more int widening {} to {}", .{ inst.ty, dest_type }); - } - } - - return self.fail(inst.src, "TODO implement type coercion from {} to {}", .{ inst.ty, dest_type }); - } - - fn bitcast(self: *Analyze, block: ?*Block, dest_type: Type, inst: *Inst) !*Inst { - if (inst.value()) |val| { - // Keep the comptime Value representation; take the new type. - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); - } - // TODO validate the type size and other compile errors - const b = try self.requireRuntimeBlock(block, inst.src); - return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, Inst.Args(Inst.BitCast){ .operand = inst }); - } - - fn coerceArrayPtrToSlice(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst { - if (inst.value()) |val| { - // The comptime Value representation is compatible with both types. - return self.constInst(inst.src, .{ .ty = dest_type, .val = val }); - } - return self.fail(inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{}); - } - - fn fail(self: *Analyze, src: usize, comptime format: []const u8, args: var) InnerError { - @setCold(true); - const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args); - (try self.errors.addOne()).* = .{ - .byte_offset = src, - .msg = msg, - }; - return error.AnalysisFail; - } - - const InMemoryCoercionResult = enum { - ok, - no_match, - }; - - fn coerceInMemoryAllowed(dest_type: Type, src_type: Type) InMemoryCoercionResult { - if (dest_type.eql(src_type)) - return .ok; - - // TODO: implement more of this function - - return .no_match; - } -}; - -pub fn main() anyerror!void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = if (std.builtin.link_libc) std.heap.c_allocator else &arena.allocator; - - const args = try std.process.argsAlloc(allocator); - defer std.process.argsFree(allocator, args); - - const src_path = args[1]; - const debug_error_trace = true; - - const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0); - defer allocator.free(source); - - var zir_module = try text.parse(allocator, source); - defer zir_module.deinit(allocator); - - if (zir_module.errors.len != 0) { - for (zir_module.errors) |err_msg| { - const loc = std.zig.findLineColumn(source, err_msg.byte_offset); - std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); - } - if (debug_error_trace) return error.ParseFailure; - std.process.exit(1); - } - - const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); - - var analyzed_module = try analyze(allocator, zir_module, .{ - .target = native_info.target, - .output_mode = .Obj, - .link_mode = .Static, - .optimize_mode = .Debug, - }); - defer analyzed_module.deinit(allocator); - - if (analyzed_module.errors.len != 0) { - for (analyzed_module.errors) |err_msg| { - const loc = std.zig.findLineColumn(source, err_msg.byte_offset); - std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); - } - if (debug_error_trace) return error.AnalysisFail; - std.process.exit(1); - } - - const output_zir = true; - if (output_zir) { - var new_zir_module = try text.emit_zir(allocator, analyzed_module); - defer new_zir_module.deinit(allocator); - - var bos = std.io.bufferedOutStream(std.io.getStdOut().outStream()); - try new_zir_module.writeToStream(allocator, bos.outStream()); - try bos.flush(); - } - - const link = @import("link.zig"); - var result = try link.updateFilePath(allocator, analyzed_module, std.fs.cwd(), "zir.o"); - defer result.deinit(allocator); - if (result.errors.len != 0) { - for (result.errors) |err_msg| { - const loc = std.zig.findLineColumn(source, err_msg.byte_offset); - std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); - } - if (debug_error_trace) return error.LinkFailure; - std.process.exit(1); - } -} - -// Performance optimization ideas: -// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions diff --git a/src-self-hosted/libc_installation.zig b/src-self-hosted/libc_installation.zig index 65e5776422..dfc0f1235a 100644 --- a/src-self-hosted/libc_installation.zig +++ b/src-self-hosted/libc_installation.zig @@ -1,6 +1,5 @@ const std = @import("std"); const builtin = @import("builtin"); -const util = @import("util.zig"); const Target = std.Target; const fs = std.fs; const Allocator = std.mem.Allocator; diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 504c374ca7..a0c7ec8493 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -3,56 +3,74 @@ const mem = std.mem; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ir = @import("ir.zig"); +const Module = @import("Module.zig"); const fs = std.fs; const elf = std.elf; const codegen = @import("codegen.zig"); const default_entry_addr = 0x8000000; -pub const ErrorMsg = struct { - byte_offset: usize, - msg: []const u8, -}; - -pub const Result = struct { - errors: []ErrorMsg, - - pub fn deinit(self: *Result, allocator: *mem.Allocator) void { - for (self.errors) |err| { - allocator.free(err.msg); - } - allocator.free(self.errors); - self.* = undefined; - } +pub const Options = struct { + target: std.Target, + output_mode: std.builtin.OutputMode, + link_mode: std.builtin.LinkMode, + object_format: std.builtin.ObjectFormat, + /// Used for calculating how much space to reserve for symbols in case the binary file + /// does not already have a symbol table. + symbol_count_hint: u64 = 32, + /// Used for calculating how much space to reserve for executable program code in case + /// the binary file deos not already have such a section. + program_code_size_hint: u64 = 256 * 1024, }; /// Attempts incremental linking, if the file already exists. /// If incremental linking fails, falls back to truncating the file and rewriting it. /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior. /// This operation is not atomic. -pub fn updateFilePath( +pub fn openBinFilePath( allocator: *Allocator, - module: ir.Module, dir: fs.Dir, sub_path: []const u8, -) !Result { - const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = determineMode(module) }); - defer file.close(); + options: Options, +) !ElfFile { + const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = determineMode(options) }); + errdefer file.close(); - return updateFile(allocator, module, file); + var bin_file = try openBinFile(allocator, file, options); + bin_file.owns_file_handle = true; + return bin_file; } /// Atomically overwrites the old file, if present. pub fn writeFilePath( allocator: *Allocator, - module: ir.Module, dir: fs.Dir, sub_path: []const u8, -) !Result { - const af = try dir.atomicFile(sub_path, .{ .mode = determineMode(module) }); + module: Module, + errors: *std.ArrayList(Module.ErrorMsg), +) !void { + const options: Options = .{ + .target = module.target, + .output_mode = module.output_mode, + .link_mode = module.link_mode, + .object_format = module.object_format, + .symbol_count_hint = module.decls.items.len, + }; + const af = try dir.atomicFile(sub_path, .{ .mode = determineMode(options) }); defer af.deinit(); - const result = try writeFile(allocator, module, af.file); + const elf_file = try createElfFile(allocator, af.file, options); + for (module.decls.items) |decl| { + try elf_file.updateDecl(module, decl, errors); + } + try elf_file.flush(); + if (elf_file.error_flags.no_entry_point_found) { + try errors.ensureCapacity(errors.items.len + 1); + errors.appendAssumeCapacity(.{ + .byte_offset = 0, + .msg = try std.fmt.allocPrint(errors.allocator, "no entry point found", .{}), + }); + } try af.finish(); return result; } @@ -62,58 +80,126 @@ pub fn writeFilePath( /// Returns an error if `file` is not already open with +read +write +seek abilities. /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior. /// This operation is not atomic. -pub fn updateFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { - return updateFileInner(allocator, module, file) catch |err| switch (err) { +pub fn openBinFile(allocator: *Allocator, file: fs.File, options: Options) !ElfFile { + return openBinFileInner(allocator, file, options) catch |err| switch (err) { error.IncrFailed => { - return writeFile(allocator, module, file); + return createElfFile(allocator, file, options); }, else => |e| return e, }; } -const Update = struct { - file: fs.File, - module: *const ir.Module, +pub const ElfFile = struct { + allocator: *Allocator, + file: ?fs.File, + owns_file_handle: bool, + options: Options, + ptr_width: enum { p32, p64 }, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. - sections: std.ArrayList(elf.Elf64_Shdr), - shdr_table_offset: ?u64, + sections: std.ArrayListUnmanaged(elf.Elf64_Shdr) = std.ArrayListUnmanaged(elf.Elf64_Shdr){}, + shdr_table_offset: ?u64 = null, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. - program_headers: std.ArrayList(elf.Elf64_Phdr), - phdr_table_offset: ?u64, + program_headers: std.ArrayListUnmanaged(elf.Elf64_Phdr) = std.ArrayListUnmanaged(elf.Elf64_Phdr){}, + phdr_table_offset: ?u64 = null, /// The index into the program headers of a PT_LOAD program header with Read and Execute flags - phdr_load_re_index: ?u16, - entry_addr: ?u64, + phdr_load_re_index: ?u16 = null, + /// The index into the program headers of the global offset table. + /// It needs PT_LOAD and Read flags. + phdr_got_index: ?u16 = null, + entry_addr: ?u64 = null, - shstrtab: std.ArrayList(u8), - shstrtab_index: ?u16, + shstrtab: std.ArrayListUnmanaged(u8) = std.ArrayListUnmanaged(u8){}, + shstrtab_index: ?u16 = null, - text_section_index: ?u16, - symtab_section_index: ?u16, + text_section_index: ?u16 = null, + symtab_section_index: ?u16 = null, + got_section_index: ?u16 = null, - /// The same order as in the file - symbols: std.ArrayList(elf.Elf64_Sym), + /// The same order as in the file. ELF requires global symbols to all be after the + /// local symbols, they cannot be mixed. So we must buffer all the global symbols and + /// write them at the end. These are only the local symbols. The length of this array + /// is the value used for sh_info in the .symtab section. + local_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = std.ArrayListUnmanaged(elf.Elf64_Sym){}, + global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = std.ArrayListUnmanaged(elf.Elf64_Sym){}, - errors: std.ArrayList(ErrorMsg), + /// Same order as in the file. The value is the absolute vaddr value. + /// If the vaddr of the executable program header changes, the entire + /// offset table needs to be rewritten. + offset_table: std.ArrayListUnmanaged(u64) = std.ArrayListUnmanaged(u64){}, - fn deinit(self: *Update) void { - self.sections.deinit(); - self.program_headers.deinit(); - self.shstrtab.deinit(); - self.symbols.deinit(); - self.errors.deinit(); + phdr_table_dirty: bool = false, + shdr_table_dirty: bool = false, + shstrtab_dirty: bool = false, + offset_table_count_dirty: bool = false, + + error_flags: ErrorFlags = ErrorFlags{}, + + pub const ErrorFlags = struct { + no_entry_point_found: bool = false, + }; + + pub const Decl = struct { + /// Each decl always gets a local symbol with the fully qualified name. + /// The vaddr and size are found here directly. + /// The file offset is found by computing the vaddr offset from the section vaddr + /// the symbol references, and adding that to the file offset of the section. + /// If this field is 0, it means the codegen size = 0 and there is no symbol or + /// offset table entry. + local_sym_index: u32, + /// This field is undefined for symbols with size = 0. + offset_table_index: u32, + + pub const empty = Decl{ + .local_sym_index = 0, + .offset_table_index = undefined, + }; + }; + + pub const Export = struct { + sym_index: ?u32 = null, + }; + + pub fn deinit(self: *ElfFile) void { + self.sections.deinit(self.allocator); + self.program_headers.deinit(self.allocator); + self.shstrtab.deinit(self.allocator); + self.local_symbols.deinit(self.allocator); + self.global_symbols.deinit(self.allocator); + self.offset_table.deinit(self.allocator); + if (self.owns_file_handle) { + if (self.file) |f| f.close(); + } } - // `expand_num / expand_den` is the factor of padding when allocation + pub fn makeExecutable(self: *ElfFile) !void { + assert(self.owns_file_handle); + if (self.file) |f| { + f.close(); + self.file = null; + } + } + + pub fn makeWritable(self: *ElfFile, dir: fs.Dir, sub_path: []const u8) !void { + assert(self.owns_file_handle); + if (self.file != null) return; + self.file = try dir.createFile(sub_path, .{ + .truncate = false, + .read = true, + .mode = determineMode(self.options), + }); + } + + // `alloc_num / alloc_den` is the factor of padding when allocation const alloc_num = 4; const alloc_den = 3; /// Returns end pos of collision, if any. - fn detectAllocCollision(self: *Update, start: u64, size: u64) ?u64 { - const small_ptr = self.module.target.cpu.arch.ptrBitWidth() == 32; + fn detectAllocCollision(self: *ElfFile, start: u64, size: u64) ?u64 { + const small_ptr = self.options.target.cpu.arch.ptrBitWidth() == 32; const ehdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Ehdr) else @sizeOf(elf.Elf64_Ehdr); if (start < ehdr_size) return ehdr_size; @@ -157,7 +243,7 @@ const Update = struct { return null; } - fn allocatedSize(self: *Update, start: u64) u64 { + fn allocatedSize(self: *ElfFile, start: u64) u64 { var min_pos: u64 = std.math.maxInt(u64); if (self.shdr_table_offset) |off| { if (off > start and off < min_pos) min_pos = off; @@ -176,7 +262,7 @@ const Update = struct { return min_pos - start; } - fn findFreeSpace(self: *Update, object_size: u64, min_alignment: u16) u64 { + fn findFreeSpace(self: *ElfFile, object_size: u64, min_alignment: u16) u64 { var start: u64 = 0; while (self.detectAllocCollision(start, object_size)) |item_end| { start = mem.alignForwardGeneric(u64, item_end, min_alignment); @@ -184,73 +270,86 @@ const Update = struct { return start; } - fn makeString(self: *Update, bytes: []const u8) !u32 { + fn makeString(self: *ElfFile, bytes: []const u8) !u32 { + try self.shstrtab.ensureCapacity(self.allocator, self.shstrtab.items.len + bytes.len + 1); const result = self.shstrtab.items.len; - try self.shstrtab.appendSlice(bytes); - try self.shstrtab.append(0); + self.shstrtab.appendSliceAssumeCapacity(bytes); + self.shstrtab.appendAssumeCapacity(0); return @intCast(u32, result); } - fn perform(self: *Update) !void { - const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, - else => return error.UnsupportedArchitecture, - }; - const small_ptr = switch (ptr_width) { + fn getString(self: *ElfFile, str_off: u32) []const u8 { + assert(str_off < self.shstrtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.shstrtab.items.ptr + str_off)); + } + + fn updateString(self: *ElfFile, old_str_off: u32, new_name: []const u8) !u32 { + const existing_name = self.getString(old_str_off); + if (mem.eql(u8, existing_name, new_name)) { + return old_str_off; + } + return self.makeString(new_name); + } + + pub fn populateMissingMetadata(self: *ElfFile) !void { + const small_ptr = switch (self.ptr_width) { .p32 => true, .p64 => false, }; - // This means the entire read-only executable program code needs to be rewritten. - var phdr_load_re_dirty = false; - var phdr_table_dirty = false; - var shdr_table_dirty = false; - var shstrtab_dirty = false; - var symtab_dirty = false; - + const ptr_size: u8 = switch (self.ptr_width) { + .p32 => 4, + .p64 => 8, + }; if (self.phdr_load_re_index == null) { self.phdr_load_re_index = @intCast(u16, self.program_headers.items.len); - const file_size = 256 * 1024; + const file_size = self.options.program_code_size_hint; const p_align = 0x1000; const off = self.findFreeSpace(file_size, p_align); //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); - try self.program_headers.append(.{ + try self.program_headers.append(self.allocator, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, .p_vaddr = default_entry_addr, .p_paddr = default_entry_addr, - .p_memsz = 0, + .p_memsz = file_size, .p_align = p_align, .p_flags = elf.PF_X | elf.PF_R, }); self.entry_addr = null; - phdr_load_re_dirty = true; - phdr_table_dirty = true; + self.phdr_table_dirty = true; } - if (self.sections.items.len == 0) { - // There must always be a null section in index 0 - try self.sections.append(.{ - .sh_name = 0, - .sh_type = elf.SHT_NULL, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 0, - .sh_entsize = 0, + if (self.phdr_got_index == null) { + self.phdr_got_index = @intCast(u16, self.program_headers.items.len); + const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint; + // We really only need ptr alignment but since we are using PROGBITS, linux requires + // page align. + const p_align = 0x1000; + const off = self.findFreeSpace(file_size, p_align); + //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); + // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. + // we'll need to re-use that function anyway, in case the GOT grows and overlaps something + // else in virtual memory. + const default_got_addr = 0x4000000; + try self.program_headers.append(self.allocator, .{ + .p_type = elf.PT_LOAD, + .p_offset = off, + .p_filesz = file_size, + .p_vaddr = default_got_addr, + .p_paddr = default_got_addr, + .p_memsz = file_size, + .p_align = p_align, + .p_flags = elf.PF_R, }); - shdr_table_dirty = true; + self.phdr_table_dirty = true; } if (self.shstrtab_index == null) { self.shstrtab_index = @intCast(u16, self.sections.items.len); assert(self.shstrtab.items.len == 0); - try self.shstrtab.append(0); // need a 0 at position 0 + try self.shstrtab.append(self.allocator, 0); // need a 0 at position 0 const off = self.findFreeSpace(self.shstrtab.items.len, 1); //std.debug.warn("found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len }); - try self.sections.append(.{ + try self.sections.append(self.allocator, .{ .sh_name = try self.makeString(".shstrtab"), .sh_type = elf.SHT_STRTAB, .sh_flags = 0, @@ -262,14 +361,14 @@ const Update = struct { .sh_addralign = 1, .sh_entsize = 0, }); - shstrtab_dirty = true; - shdr_table_dirty = true; + self.shstrtab_dirty = true; + self.shdr_table_dirty = true; } if (self.text_section_index == null) { self.text_section_index = @intCast(u16, self.sections.items.len); const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; - try self.sections.append(.{ + try self.sections.append(self.allocator, .{ .sh_name = try self.makeString(".text"), .sh_type = elf.SHT_PROGBITS, .sh_flags = elf.SHF_ALLOC | elf.SHF_EXECINSTR, @@ -281,17 +380,35 @@ const Update = struct { .sh_addralign = phdr.p_align, .sh_entsize = 0, }); - shdr_table_dirty = true; + self.shdr_table_dirty = true; + } + if (self.got_section_index == null) { + self.got_section_index = @intCast(u16, self.sections.items.len); + const phdr = &self.program_headers.items[self.phdr_got_index.?]; + + try self.sections.append(self.allocator, .{ + .sh_name = try self.makeString(".got"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_ALLOC, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = phdr.p_align, + .sh_entsize = 0, + }); + self.shdr_table_dirty = true; } if (self.symtab_section_index == null) { self.symtab_section_index = @intCast(u16, self.sections.items.len); const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); - const file_size = self.module.exports.len * each_size; + const file_size = self.options.symbol_count_hint * each_size; const off = self.findFreeSpace(file_size, min_align); //std.debug.warn("found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); - try self.sections.append(.{ + try self.sections.append(self.allocator, .{ .sh_name = try self.makeString(".symtab"), .sh_type = elf.SHT_SYMTAB, .sh_flags = 0, @@ -300,42 +417,56 @@ const Update = struct { .sh_size = file_size, // The section header index of the associated string table. .sh_link = self.shstrtab_index.?, - .sh_info = @intCast(u32, self.module.exports.len), + .sh_info = @intCast(u32, self.local_symbols.items.len), .sh_addralign = min_align, .sh_entsize = each_size, }); - symtab_dirty = true; - shdr_table_dirty = true; + self.shdr_table_dirty = true; + try self.writeSymbol(0); } - const shsize: u64 = switch (ptr_width) { + const shsize: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Shdr), .p64 => @sizeOf(elf.Elf64_Shdr), }; - const shalign: u16 = switch (ptr_width) { + const shalign: u16 = switch (self.ptr_width) { .p32 => @alignOf(elf.Elf32_Shdr), .p64 => @alignOf(elf.Elf64_Shdr), }; if (self.shdr_table_offset == null) { self.shdr_table_offset = self.findFreeSpace(self.sections.items.len * shsize, shalign); - shdr_table_dirty = true; + self.shdr_table_dirty = true; } - const phsize: u64 = switch (ptr_width) { + const phsize: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Phdr), .p64 => @sizeOf(elf.Elf64_Phdr), }; - const phalign: u16 = switch (ptr_width) { + const phalign: u16 = switch (self.ptr_width) { .p32 => @alignOf(elf.Elf32_Phdr), .p64 => @alignOf(elf.Elf64_Phdr), }; if (self.phdr_table_offset == null) { self.phdr_table_offset = self.findFreeSpace(self.program_headers.items.len * phsize, phalign); - phdr_table_dirty = true; + self.phdr_table_dirty = true; } - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + } - try self.writeCodeAndSymbols(phdr_table_dirty, shdr_table_dirty); + /// Commit pending changes and write headers. + pub fn flush(self: *ElfFile) !void { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); - if (phdr_table_dirty) { + // Unfortunately these have to be buffered and done at the end because ELF does not allow + // mixing local and global symbols within a symbol table. + try self.writeAllGlobalSymbols(); + + if (self.phdr_table_dirty) { + const phsize: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Phdr), + .p64 => @sizeOf(elf.Elf64_Phdr), + }; + const phalign: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Phdr), + .p64 => @alignOf(elf.Elf64_Phdr), + }; const allocated_size = self.allocatedSize(self.phdr_table_offset.?); const needed_size = self.program_headers.items.len * phsize; @@ -344,11 +475,10 @@ const Update = struct { self.phdr_table_offset = self.findFreeSpace(needed_size, phalign); } - const allocator = self.program_headers.allocator; - switch (ptr_width) { + switch (self.ptr_width) { .p32 => { - const buf = try allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf32_Phdr, self.program_headers.items.len); + defer self.allocator.free(buf); for (buf) |*phdr, i| { phdr.* = progHeaderTo32(self.program_headers.items[i]); @@ -356,11 +486,11 @@ const Update = struct { bswapAllFields(elf.Elf32_Phdr, phdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, .p64 => { - const buf = try allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf64_Phdr, self.program_headers.items.len); + defer self.allocator.free(buf); for (buf) |*phdr, i| { phdr.* = self.program_headers.items[i]; @@ -368,14 +498,15 @@ const Update = struct { bswapAllFields(elf.Elf64_Phdr, phdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); }, } + self.phdr_table_dirty = false; } { const shstrtab_sect = &self.sections.items[self.shstrtab_index.?]; - if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) { + if (self.shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) { const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset); const needed_size = self.shstrtab.items.len; @@ -386,27 +517,35 @@ const Update = struct { shstrtab_sect.sh_size = needed_size; //std.debug.warn("shstrtab start=0x{x} end=0x{x}\n", .{ shstrtab_sect.sh_offset, shstrtab_sect.sh_offset + needed_size }); - try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset); - if (!shdr_table_dirty) { + try self.file.?.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset); + if (!self.shdr_table_dirty) { // Then it won't get written with the others and we need to do it. try self.writeSectHeader(self.shstrtab_index.?); } + self.shstrtab_dirty = false; } } - if (shdr_table_dirty) { + if (self.shdr_table_dirty) { + const shsize: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Shdr), + .p64 => @sizeOf(elf.Elf64_Shdr), + }; + const shalign: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Shdr), + .p64 => @alignOf(elf.Elf64_Shdr), + }; const allocated_size = self.allocatedSize(self.shdr_table_offset.?); - const needed_size = self.sections.items.len * phsize; + const needed_size = self.sections.items.len * shsize; if (needed_size > allocated_size) { self.shdr_table_offset = null; // free the space - self.shdr_table_offset = self.findFreeSpace(needed_size, phalign); + self.shdr_table_offset = self.findFreeSpace(needed_size, shalign); } - const allocator = self.sections.allocator; - switch (ptr_width) { + switch (self.ptr_width) { .p32 => { - const buf = try allocator.alloc(elf.Elf32_Shdr, self.sections.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf32_Shdr, self.sections.items.len); + defer self.allocator.free(buf); for (buf) |*shdr, i| { shdr.* = sectHeaderTo32(self.sections.items[i]); @@ -414,11 +553,11 @@ const Update = struct { bswapAllFields(elf.Elf32_Shdr, shdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, .p64 => { - const buf = try allocator.alloc(elf.Elf64_Shdr, self.sections.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf64_Shdr, self.sections.items.len); + defer self.allocator.free(buf); for (buf) |*shdr, i| { shdr.* = self.sections.items[i]; @@ -427,42 +566,42 @@ const Update = struct { bswapAllFields(elf.Elf64_Shdr, shdr); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, } + self.shdr_table_dirty = false; } - if (self.entry_addr == null and self.module.output_mode == .Exe) { - const msg = try std.fmt.allocPrint(self.errors.allocator, "no entry point found", .{}); - errdefer self.errors.allocator.free(msg); - try self.errors.append(.{ - .byte_offset = 0, - .msg = msg, - }); + if (self.entry_addr == null and self.options.output_mode == .Exe) { + self.error_flags.no_entry_point_found = true; } else { + self.error_flags.no_entry_point_found = false; try self.writeElfHeader(); } // TODO find end pos and truncate + + // The point of flush() is to commit changes, so nothing should be dirty after this. + assert(!self.phdr_table_dirty); + assert(!self.shdr_table_dirty); + assert(!self.shstrtab_dirty); + assert(!self.offset_table_count_dirty); + const syms_sect = &self.sections.items[self.symtab_section_index.?]; + assert(syms_sect.sh_info == self.local_symbols.items.len); } - fn writeElfHeader(self: *Update) !void { + fn writeElfHeader(self: *ElfFile) !void { var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined; var index: usize = 0; hdr_buf[0..4].* = "\x7fELF".*; index += 4; - const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, - else => return error.UnsupportedArchitecture, - }; - hdr_buf[index] = switch (ptr_width) { + hdr_buf[index] = switch (self.ptr_width) { .p32 => elf.ELFCLASS32, .p64 => elf.ELFCLASS64, }; index += 1; - const endian = self.module.target.cpu.arch.endian(); + const endian = self.options.target.cpu.arch.endian(); hdr_buf[index] = switch (endian) { .Little => elf.ELFDATA2LSB, .Big => elf.ELFDATA2MSB, @@ -480,10 +619,10 @@ const Update = struct { assert(index == 16); - const elf_type = switch (self.module.output_mode) { + const elf_type = switch (self.options.output_mode) { .Exe => elf.ET.EXEC, .Obj => elf.ET.REL, - .Lib => switch (self.module.link_mode) { + .Lib => switch (self.options.link_mode) { .Static => elf.ET.REL, .Dynamic => elf.ET.DYN, }, @@ -491,7 +630,7 @@ const Update = struct { mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(elf_type), endian); index += 2; - const machine = self.module.target.cpu.arch.toElfMachine(); + const machine = self.options.target.cpu.arch.toElfMachine(); mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(machine), endian); index += 2; @@ -501,7 +640,7 @@ const Update = struct { const e_entry = if (elf_type == .REL) 0 else self.entry_addr.?; - switch (ptr_width) { + switch (self.ptr_width) { .p32 => { mem.writeInt(u32, hdr_buf[index..][0..4], @intCast(u32, e_entry), endian); index += 4; @@ -533,14 +672,14 @@ const Update = struct { mem.writeInt(u32, hdr_buf[index..][0..4], e_flags, endian); index += 4; - const e_ehsize: u16 = switch (ptr_width) { + const e_ehsize: u16 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Ehdr), .p64 => @sizeOf(elf.Elf64_Ehdr), }; mem.writeInt(u16, hdr_buf[index..][0..2], e_ehsize, endian); index += 2; - const e_phentsize: u16 = switch (ptr_width) { + const e_phentsize: u16 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Phdr), .p64 => @sizeOf(elf.Elf64_Phdr), }; @@ -551,7 +690,7 @@ const Update = struct { mem.writeInt(u16, hdr_buf[index..][0..2], e_phnum, endian); index += 2; - const e_shentsize: u16 = switch (ptr_width) { + const e_shentsize: u16 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Shdr), .p64 => @sizeOf(elf.Elf64_Shdr), }; @@ -567,186 +706,463 @@ const Update = struct { assert(index == e_ehsize); - try self.file.pwriteAll(hdr_buf[0..index], 0); + try self.file.?.pwriteAll(hdr_buf[0..index], 0); } - fn writeCodeAndSymbols(self: *Update, phdr_table_dirty: bool, shdr_table_dirty: bool) !void { - // index 0 is always a null symbol - try self.symbols.resize(1); - self.symbols.items[0] = .{ + const AllocatedBlock = struct { + vaddr: u64, + file_offset: u64, + size_capacity: u64, + }; + + fn allocateTextBlock(self: *ElfFile, new_block_size: u64, alignment: u64) !AllocatedBlock { + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + const shdr = &self.sections.items[self.text_section_index.?]; + + // TODO Also detect virtual address collisions. + const text_capacity = self.allocatedSize(shdr.sh_offset); + // TODO instead of looping here, maintain a free list and a pointer to the end. + var last_start: u64 = phdr.p_vaddr; + var last_size: u64 = 0; + for (self.local_symbols.items) |sym| { + if (sym.st_value + sym.st_size > last_start + last_size) { + last_start = sym.st_value; + last_size = sym.st_size; + } + } + const end_vaddr = last_start + (last_size * alloc_num / alloc_den); + const aligned_start_vaddr = mem.alignForwardGeneric(u64, end_vaddr, alignment); + const needed_size = (aligned_start_vaddr + new_block_size) - phdr.p_vaddr; + if (needed_size > text_capacity) { + // Must move the entire text section. + const new_offset = self.findFreeSpace(needed_size, 0x1000); + const text_size = (last_start + last_size) - phdr.p_vaddr; + const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, text_size); + if (amt != text_size) return error.InputOutput; + shdr.sh_offset = new_offset; + } + // Now that we know the code size, we need to update the program header for executable code + shdr.sh_size = needed_size; + phdr.p_memsz = needed_size; + phdr.p_filesz = needed_size; + + self.phdr_table_dirty = true; // TODO look into making only the one program header dirty + self.shdr_table_dirty = true; // TODO look into making only the one section dirty + + return AllocatedBlock{ + .vaddr = aligned_start_vaddr, + .file_offset = shdr.sh_offset + (aligned_start_vaddr - phdr.p_vaddr), + .size_capacity = text_capacity - needed_size, + }; + } + + fn findAllocatedTextBlock(self: *ElfFile, sym: elf.Elf64_Sym) AllocatedBlock { + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + const shdr = &self.sections.items[self.text_section_index.?]; + + // Find the next sym after this one. + // TODO look into using a hash map to speed up perf. + const text_capacity = self.allocatedSize(shdr.sh_offset); + var next_vaddr_start = phdr.p_vaddr + text_capacity; + for (self.local_symbols.items) |elem| { + if (elem.st_value < sym.st_value) continue; + if (elem.st_value < next_vaddr_start) next_vaddr_start = elem.st_value; + } + return .{ + .vaddr = sym.st_value, + .file_offset = shdr.sh_offset + (sym.st_value - phdr.p_vaddr), + .size_capacity = next_vaddr_start - sym.st_value, + }; + } + + pub fn allocateDeclIndexes(self: *ElfFile, decl: *Module.Decl) !void { + if (decl.link.local_sym_index != 0) return; + + try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); + try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); + const local_sym_index = self.local_symbols.items.len; + const offset_table_index = self.offset_table.items.len; + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + + self.local_symbols.appendAssumeCapacity(.{ .st_name = 0, .st_info = 0, .st_other = 0, .st_shndx = 0, - .st_value = 0, + .st_value = phdr.p_vaddr, .st_size = 0, + }); + errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); + self.offset_table.appendAssumeCapacity(0); + errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); + + self.offset_table_count_dirty = true; + + decl.link = .{ + .local_sym_index = @intCast(u32, local_sym_index), + .offset_table_index = @intCast(u32, offset_table_index), }; - - const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; - var vaddr: u64 = phdr.p_vaddr; - var file_off: u64 = phdr.p_offset; - - var code = std.ArrayList(u8).init(self.sections.allocator); - defer code.deinit(); - - for (self.module.exports) |exp| { - code.shrink(0); - var symbol = try codegen.generateSymbol(exp.typed_value, self.module.*, &code); - defer symbol.deinit(code.allocator); - if (symbol.errors.len != 0) { - for (symbol.errors) |err| { - const msg = try mem.dupe(self.errors.allocator, u8, err.msg); - errdefer self.errors.allocator.free(msg); - try self.errors.append(.{ - .byte_offset = err.byte_offset, - .msg = msg, - }); - } - continue; - } - try self.file.pwriteAll(code.items, file_off); - - if (mem.eql(u8, exp.name, "_start")) { - self.entry_addr = vaddr; - } - (try self.symbols.addOne()).* = .{ - .st_name = try self.makeString(exp.name), - .st_info = (elf.STB_LOCAL << 4) | elf.STT_FUNC, - .st_other = 0, - .st_shndx = self.text_section_index.?, - .st_value = vaddr, - .st_size = code.items.len, - }; - vaddr += code.items.len; - } - - { - // Now that we know the code size, we need to update the program header for executable code - phdr.p_memsz = vaddr - phdr.p_vaddr; - phdr.p_filesz = phdr.p_memsz; - - const shdr = &self.sections.items[self.text_section_index.?]; - shdr.sh_size = phdr.p_filesz; - - if (!phdr_table_dirty) { - // Then it won't get written with the others and we need to do it. - try self.writeProgHeader(self.phdr_load_re_index.?); - } - if (!shdr_table_dirty) { - // Then it won't get written with the others and we need to do it. - try self.writeSectHeader(self.text_section_index.?); - } - } - - return self.writeSymbols(); } - fn writeProgHeader(self: *Update, index: usize) !void { - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + pub fn updateDecl(self: *ElfFile, module: *Module, decl: *Module.Decl) !void { + var code_buffer = std.ArrayList(u8).init(self.allocator); + defer code_buffer.deinit(); + + const typed_value = decl.typed_value.most_recent.typed_value; + const code = switch (try codegen.generateSymbol(self, decl.src, typed_value, &code_buffer)) { + .externally_managed => |x| x, + .appended => code_buffer.items, + .fail => |em| { + decl.analysis = .codegen_failure; + _ = try module.failed_decls.put(decl, em); + return; + }, + }; + + const required_alignment = typed_value.ty.abiAlignment(self.options.target); + + const file_offset = blk: { + const stt_bits: u8 = switch (typed_value.ty.zigTypeTag()) { + .Fn => elf.STT_FUNC, + else => elf.STT_OBJECT, + }; + + if (decl.link.local_sym_index != 0) { + const local_sym = &self.local_symbols.items[decl.link.local_sym_index]; + const existing_block = self.findAllocatedTextBlock(local_sym.*); + const need_realloc = local_sym.st_size == 0 or + code.len > existing_block.size_capacity or + !mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment); + // TODO check for collision with another symbol + const file_offset = if (need_realloc) fo: { + const new_block = try self.allocateTextBlock(code.len, required_alignment); + local_sym.st_value = new_block.vaddr; + self.offset_table.items[decl.link.offset_table_index] = new_block.vaddr; + + //std.debug.warn("{}: writing got index {}=0x{x}\n", .{ + // decl.name, + // decl.link.offset_table_index, + // self.offset_table.items[decl.link.offset_table_index], + //}); + try self.writeOffsetTableEntry(decl.link.offset_table_index); + + break :fo new_block.file_offset; + } else existing_block.file_offset; + local_sym.st_size = code.len; + local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(decl.name)); + local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; + local_sym.st_other = 0; + local_sym.st_shndx = self.text_section_index.?; + // TODO this write could be avoided if no fields of the symbol were changed. + try self.writeSymbol(decl.link.local_sym_index); + + //std.debug.warn("updating {} at vaddr 0x{x}\n", .{ decl.name, local_sym.st_value }); + break :blk file_offset; + } else { + try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); + try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); + const decl_name = mem.spanZ(decl.name); + const name_str_index = try self.makeString(decl_name); + const new_block = try self.allocateTextBlock(code.len, required_alignment); + const local_sym_index = self.local_symbols.items.len; + const offset_table_index = self.offset_table.items.len; + + //std.debug.warn("add symbol for {} at vaddr 0x{x}, size {}\n", .{ decl.name, new_block.vaddr, code.len }); + self.local_symbols.appendAssumeCapacity(.{ + .st_name = name_str_index, + .st_info = (elf.STB_LOCAL << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = new_block.vaddr, + .st_size = code.len, + }); + errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); + self.offset_table.appendAssumeCapacity(new_block.vaddr); + errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); + + self.offset_table_count_dirty = true; + + try self.writeSymbol(local_sym_index); + try self.writeOffsetTableEntry(offset_table_index); + + decl.link = .{ + .local_sym_index = @intCast(u32, local_sym_index), + .offset_table_index = @intCast(u32, offset_table_index), + }; + + //std.debug.warn("writing new {} at vaddr 0x{x}\n", .{ decl.name, new_block.vaddr }); + break :blk new_block.file_offset; + } + }; + + try self.file.?.pwriteAll(code, file_offset); + + // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. + const decl_exports = module.decl_exports.getValue(decl) orelse &[0]*Module.Export{}; + return self.updateDeclExports(module, decl, decl_exports); + } + + /// Must be called only after a successful call to `updateDecl`. + pub fn updateDeclExports( + self: *ElfFile, + module: *Module, + decl: *const Module.Decl, + exports: []const *Module.Export, + ) !void { + try self.global_symbols.ensureCapacity(self.allocator, self.global_symbols.items.len + exports.len); + const typed_value = decl.typed_value.most_recent.typed_value; + if (decl.link.local_sym_index == 0) return; + const decl_sym = self.local_symbols.items[decl.link.local_sym_index]; + + for (exports) |exp| { + if (exp.options.section) |section_name| { + if (!mem.eql(u8, section_name, ".text")) { + try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); + module.failed_exports.putAssumeCapacityNoClobber( + exp, + try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: ExportOptions.section", .{}), + ); + continue; + } + } + const stb_bits: u8 = switch (exp.options.linkage) { + .Internal => elf.STB_LOCAL, + .Strong => blk: { + if (mem.eql(u8, exp.options.name, "_start")) { + self.entry_addr = decl_sym.st_value; + } + break :blk elf.STB_GLOBAL; + }, + .Weak => elf.STB_WEAK, + .LinkOnce => { + try module.failed_exports.ensureCapacity(module.failed_exports.size + 1); + module.failed_exports.putAssumeCapacityNoClobber( + exp, + try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: GlobalLinkage.LinkOnce", .{}), + ); + continue; + }, + }; + const stt_bits: u8 = @truncate(u4, decl_sym.st_info); + if (exp.link.sym_index) |i| { + const sym = &self.global_symbols.items[i]; + sym.* = .{ + .st_name = try self.updateString(sym.st_name, exp.options.name), + .st_info = (stb_bits << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = decl_sym.st_value, + .st_size = decl_sym.st_size, + }; + } else { + const name = try self.makeString(exp.options.name); + const i = self.global_symbols.items.len; + self.global_symbols.appendAssumeCapacity(.{ + .st_name = name, + .st_info = (stb_bits << 4) | stt_bits, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = decl_sym.st_value, + .st_size = decl_sym.st_size, + }); + errdefer self.global_symbols.shrink(self.allocator, self.global_symbols.items.len - 1); + + exp.link.sym_index = @intCast(u32, i); + } + } + } + + fn writeProgHeader(self: *ElfFile, index: usize) !void { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.program_headers.items[index].p_offset; - switch (self.module.target.cpu.arch.ptrBitWidth()) { + switch (self.options.target.cpu.arch.ptrBitWidth()) { 32 => { var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])}; if (foreign_endian) { bswapAllFields(elf.Elf32_Phdr, &phdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&phdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, 64 => { var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Phdr, &phdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&phdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, else => return error.UnsupportedArchitecture, } } - fn writeSectHeader(self: *Update, index: usize) !void { - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + fn writeSectHeader(self: *ElfFile, index: usize) !void { + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.sections.items[index].sh_offset; - switch (self.module.target.cpu.arch.ptrBitWidth()) { + switch (self.options.target.cpu.arch.ptrBitWidth()) { 32 => { var shdr: [1]elf.Elf32_Shdr = undefined; shdr[0] = sectHeaderTo32(self.sections.items[index]); if (foreign_endian) { bswapAllFields(elf.Elf32_Shdr, &shdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&shdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); }, 64 => { var shdr = [1]elf.Elf64_Shdr{self.sections.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Shdr, &shdr[0]); } - return self.file.pwriteAll(mem.sliceAsBytes(&shdr), offset); + return self.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); }, else => return error.UnsupportedArchitecture, } } - fn writeSymbols(self: *Update) !void { - const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, - else => return error.UnsupportedArchitecture, + fn writeOffsetTableEntry(self: *ElfFile, index: usize) !void { + const shdr = &self.sections.items[self.got_section_index.?]; + const phdr = &self.program_headers.items[self.phdr_got_index.?]; + const entry_size: u16 = switch (self.ptr_width) { + .p32 => 4, + .p64 => 8, }; - const small_ptr = ptr_width == .p32; - const syms_sect = &self.sections.items[self.symtab_section_index.?]; - const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); - const sym_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); + if (self.offset_table_count_dirty) { + // TODO Also detect virtual address collisions. + const allocated_size = self.allocatedSize(shdr.sh_offset); + const needed_size = self.local_symbols.items.len * entry_size; + if (needed_size > allocated_size) { + // Must move the entire got section. + const new_offset = self.findFreeSpace(needed_size, entry_size); + const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, shdr.sh_size); + if (amt != shdr.sh_size) return error.InputOutput; + shdr.sh_offset = new_offset; + } + shdr.sh_size = needed_size; + phdr.p_memsz = needed_size; + phdr.p_filesz = needed_size; - const allocated_size = self.allocatedSize(syms_sect.sh_offset); - const needed_size = self.symbols.items.len * sym_size; - if (needed_size > allocated_size) { - syms_sect.sh_size = 0; // free the space - syms_sect.sh_offset = self.findFreeSpace(needed_size, sym_align); - //std.debug.warn("moved symtab to 0x{x} to 0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size }); + self.shdr_table_dirty = true; // TODO look into making only the one section dirty + self.phdr_table_dirty = true; // TODO look into making only the one program header dirty + + self.offset_table_count_dirty = false; } - //std.debug.warn("symtab start=0x{x} end=0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size }); - syms_sect.sh_size = needed_size; - syms_sect.sh_info = @intCast(u32, self.symbols.items.len); - const allocator = self.symbols.allocator; - const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); - switch (ptr_width) { + const endian = self.options.target.cpu.arch.endian(); + const off = shdr.sh_offset + @as(u64, entry_size) * index; + switch (self.ptr_width) { .p32 => { - const buf = try allocator.alloc(elf.Elf32_Sym, self.symbols.items.len); - defer allocator.free(buf); + var buf: [4]u8 = undefined; + mem.writeInt(u32, &buf, @intCast(u32, self.offset_table.items[index]), endian); + try self.file.?.pwriteAll(&buf, off); + }, + .p64 => { + var buf: [8]u8 = undefined; + mem.writeInt(u64, &buf, self.offset_table.items[index], endian); + try self.file.?.pwriteAll(&buf, off); + }, + } + } + + fn writeSymbol(self: *ElfFile, index: usize) !void { + const syms_sect = &self.sections.items[self.symtab_section_index.?]; + // Make sure we are not pointlessly writing symbol data that will have to get relocated + // due to running out of space. + if (self.local_symbols.items.len != syms_sect.sh_info) { + const sym_size: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Sym), + .p64 => @sizeOf(elf.Elf64_Sym), + }; + const sym_align: u16 = switch (self.ptr_width) { + .p32 => @alignOf(elf.Elf32_Sym), + .p64 => @alignOf(elf.Elf64_Sym), + }; + const needed_size = (self.local_symbols.items.len + self.global_symbols.items.len) * sym_size; + if (needed_size > self.allocatedSize(syms_sect.sh_offset)) { + // Move all the symbols to a new file location. + const new_offset = self.findFreeSpace(needed_size, sym_align); + const existing_size = @as(u64, syms_sect.sh_info) * sym_size; + const amt = try self.file.?.copyRangeAll(syms_sect.sh_offset, self.file.?, new_offset, existing_size); + if (amt != existing_size) return error.InputOutput; + syms_sect.sh_offset = new_offset; + } + syms_sect.sh_info = @intCast(u32, self.local_symbols.items.len); + syms_sect.sh_size = needed_size; // anticipating adding the global symbols later + self.shdr_table_dirty = true; // TODO look into only writing one section + } + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + switch (self.ptr_width) { + .p32 => { + var sym = [1]elf.Elf32_Sym{ + .{ + .st_name = self.local_symbols.items[index].st_name, + .st_value = @intCast(u32, self.local_symbols.items[index].st_value), + .st_size = @intCast(u32, self.local_symbols.items[index].st_size), + .st_info = self.local_symbols.items[index].st_info, + .st_other = self.local_symbols.items[index].st_other, + .st_shndx = self.local_symbols.items[index].st_shndx, + }, + }; + if (foreign_endian) { + bswapAllFields(elf.Elf32_Sym, &sym[0]); + } + const off = syms_sect.sh_offset + @sizeOf(elf.Elf32_Sym) * index; + try self.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); + }, + .p64 => { + var sym = [1]elf.Elf64_Sym{self.local_symbols.items[index]}; + if (foreign_endian) { + bswapAllFields(elf.Elf64_Sym, &sym[0]); + } + const off = syms_sect.sh_offset + @sizeOf(elf.Elf64_Sym) * index; + try self.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); + }, + } + } + + fn writeAllGlobalSymbols(self: *ElfFile) !void { + const syms_sect = &self.sections.items[self.symtab_section_index.?]; + const sym_size: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Sym), + .p64 => @sizeOf(elf.Elf64_Sym), + }; + //std.debug.warn("symtab start=0x{x} end=0x{x}\n", .{ syms_sect.sh_offset, syms_sect.sh_offset + needed_size }); + const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const global_syms_off = syms_sect.sh_offset + self.local_symbols.items.len * sym_size; + switch (self.ptr_width) { + .p32 => { + const buf = try self.allocator.alloc(elf.Elf32_Sym, self.global_symbols.items.len); + defer self.allocator.free(buf); for (buf) |*sym, i| { sym.* = .{ - .st_name = self.symbols.items[i].st_name, - .st_value = @intCast(u32, self.symbols.items[i].st_value), - .st_size = @intCast(u32, self.symbols.items[i].st_size), - .st_info = self.symbols.items[i].st_info, - .st_other = self.symbols.items[i].st_other, - .st_shndx = self.symbols.items[i].st_shndx, + .st_name = self.global_symbols.items[i].st_name, + .st_value = @intCast(u32, self.global_symbols.items[i].st_value), + .st_size = @intCast(u32, self.global_symbols.items[i].st_size), + .st_info = self.global_symbols.items[i].st_info, + .st_other = self.global_symbols.items[i].st_other, + .st_shndx = self.global_symbols.items[i].st_shndx, }; if (foreign_endian) { bswapAllFields(elf.Elf32_Sym, sym); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); }, .p64 => { - const buf = try allocator.alloc(elf.Elf64_Sym, self.symbols.items.len); - defer allocator.free(buf); + const buf = try self.allocator.alloc(elf.Elf64_Sym, self.global_symbols.items.len); + defer self.allocator.free(buf); for (buf) |*sym, i| { sym.* = .{ - .st_name = self.symbols.items[i].st_name, - .st_value = self.symbols.items[i].st_value, - .st_size = self.symbols.items[i].st_size, - .st_info = self.symbols.items[i].st_info, - .st_other = self.symbols.items[i].st_other, - .st_shndx = self.symbols.items[i].st_shndx, + .st_name = self.global_symbols.items[i].st_name, + .st_value = self.global_symbols.items[i].st_value, + .st_size = self.global_symbols.items[i].st_size, + .st_info = self.global_symbols.items[i].st_info, + .st_other = self.global_symbols.items[i].st_other, + .st_shndx = self.global_symbols.items[i].st_shndx, }; if (foreign_endian) { bswapAllFields(elf.Elf64_Sym, sym); } } - try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); + try self.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); }, } } @@ -754,13 +1170,13 @@ const Update = struct { /// Truncates the existing file contents and overwrites the contents. /// Returns an error if `file` is not already open with +read +write +seek abilities. -pub fn writeFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { - switch (module.output_mode) { +pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !ElfFile { + switch (options.output_mode) { .Exe => {}, .Obj => {}, .Lib => return error.TODOImplementWritingLibFiles, } - switch (module.object_format) { + switch (options.object_format) { .unknown => unreachable, // TODO remove this tag from the enum .coff => return error.TODOImplementWritingCOFF, .elf => {}, @@ -768,38 +1184,80 @@ pub fn writeFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Resul .wasm => return error.TODOImplementWritingWasmObjects, } - var update = Update{ + var self: ElfFile = .{ + .allocator = allocator, .file = file, - .module = &module, - .sections = std.ArrayList(elf.Elf64_Shdr).init(allocator), - .shdr_table_offset = null, - .program_headers = std.ArrayList(elf.Elf64_Phdr).init(allocator), - .phdr_table_offset = null, - .phdr_load_re_index = null, - .entry_addr = null, - .shstrtab = std.ArrayList(u8).init(allocator), - .shstrtab_index = null, - .text_section_index = null, - .symtab_section_index = null, - - .symbols = std.ArrayList(elf.Elf64_Sym).init(allocator), - - .errors = std.ArrayList(ErrorMsg).init(allocator), + .options = options, + .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { + 32 => .p32, + 64 => .p64, + else => return error.UnsupportedELFArchitecture, + }, + .shdr_table_dirty = true, + .owns_file_handle = false, }; - defer update.deinit(); + errdefer self.deinit(); - try update.perform(); - return Result{ - .errors = update.errors.toOwnedSlice(), - }; + // Index 0 is always a null symbol. + try self.local_symbols.append(allocator, .{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = 0, + }); + + // There must always be a null section in index 0 + try self.sections.append(allocator, .{ + .sh_name = 0, + .sh_type = elf.SHT_NULL, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 0, + .sh_entsize = 0, + }); + + try self.populateMissingMetadata(); + + return self; } /// Returns error.IncrFailed if incremental update could not be performed. -fn updateFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { - //var ehdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined; +fn openBinFileInner(allocator: *Allocator, file: fs.File, options: Options) !ElfFile { + switch (options.output_mode) { + .Exe => {}, + .Obj => {}, + .Lib => return error.IncrFailed, + } + switch (options.object_format) { + .unknown => unreachable, // TODO remove this tag from the enum + .coff => return error.IncrFailed, + .elf => {}, + .macho => return error.IncrFailed, + .wasm => return error.IncrFailed, + } + var self: ElfFile = .{ + .allocator = allocator, + .file = file, + .owns_file_handle = false, + .options = options, + .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { + 32 => .p32, + 64 => .p64, + else => return error.UnsupportedELFArchitecture, + }, + }; + errdefer self.deinit(); - // TODO implement incremental linking + // TODO implement reading the elf file return error.IncrFailed; + //try self.populateMissingMetadata(); + //return self; } /// Saturating multiplication @@ -840,14 +1298,14 @@ fn sectHeaderTo32(shdr: elf.Elf64_Shdr) elf.Elf32_Shdr { }; } -fn determineMode(module: ir.Module) fs.File.Mode { +fn determineMode(options: Options) fs.File.Mode { // On common systems with a 0o022 umask, 0o777 will still result in a file created // with 0o755 permissions, but it works appropriately if the system is configured // more leniently. As another data point, C's fopen seems to open files with the // 666 mode. const executable_mode = if (std.Target.current.os.tag == .windows) 0 else 0o777; - switch (module.output_mode) { - .Lib => return switch (module.link_mode) { + switch (options.output_mode) { + .Lib => return switch (options.link_mode) { .Dynamic => executable_mode, .Static => fs.File.default_mode, }, diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index 05eb7212dd..4ef4acc24b 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -1,29 +1,29 @@ const std = @import("std"); -const builtin = @import("builtin"); - -const event = std.event; -const os = std.os; const io = std.io; const fs = std.fs; const mem = std.mem; const process = std.process; const Allocator = mem.Allocator; const ArrayList = std.ArrayList; +const ast = std.zig.ast; +const Module = @import("Module.zig"); +const link = @import("link.zig"); +const Package = @import("Package.zig"); +const zir = @import("zir.zig"); -const c = @import("c.zig"); -const introspect = @import("introspect.zig"); -const ZigCompiler = @import("compilation.zig").ZigCompiler; -const Compilation = @import("compilation.zig").Compilation; -const Target = std.Target; -const errmsg = @import("errmsg.zig"); -const LibCInstallation = @import("libc_installation.zig").LibCInstallation; - -pub const io_mode = .evented; +// TODO Improve async I/O enough that we feel comfortable doing this. +//pub const io_mode = .evented; pub const max_src_size = 2 * 1024 * 1024 * 1024; // 2 GiB +pub const Color = enum { + Auto, + Off, + On, +}; + const usage = - \\usage: zig [command] [options] + \\Usage: zig [command] [options] \\ \\Commands: \\ @@ -31,7 +31,6 @@ const usage = \\ build-lib [source] Create library from source or object files \\ build-obj [source] Create object from source or assembly \\ fmt [source] Parse file and render in canonical zig format - \\ libc [paths_file] Display native libc paths file or validate one \\ targets List available compilation targets \\ version Print version number and exit \\ zen Print zen of zig and exit @@ -39,175 +38,152 @@ const usage = \\ ; -const Command = struct { - name: []const u8, - exec: fn (*Allocator, []const []const u8) callconv(.Async) anyerror!void, -}; - pub fn main() !void { - const allocator = std.heap.c_allocator; + // TODO general purpose allocator in the zig std lib + const gpa = if (std.builtin.link_libc) std.heap.c_allocator else std.heap.page_allocator; + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = &arena_instance.allocator; - const stderr = io.getStdErr().outStream(); - - const args = try process.argsAlloc(allocator); - defer process.argsFree(allocator, args); + const args = try process.argsAlloc(arena); if (args.len <= 1) { - try stderr.writeAll("expected command argument\n\n"); - try stderr.writeAll(usage); + std.debug.warn("expected command argument\n\n{}", .{usage}); process.exit(1); } const cmd = args[1]; const cmd_args = args[2..]; if (mem.eql(u8, cmd, "build-exe")) { - return buildOutputType(allocator, cmd_args, .Exe); + return buildOutputType(gpa, arena, cmd_args, .Exe); } else if (mem.eql(u8, cmd, "build-lib")) { - return buildOutputType(allocator, cmd_args, .Lib); + return buildOutputType(gpa, arena, cmd_args, .Lib); } else if (mem.eql(u8, cmd, "build-obj")) { - return buildOutputType(allocator, cmd_args, .Obj); + return buildOutputType(gpa, arena, cmd_args, .Obj); } else if (mem.eql(u8, cmd, "fmt")) { - return cmdFmt(allocator, cmd_args); - } else if (mem.eql(u8, cmd, "libc")) { - return cmdLibC(allocator, cmd_args); + return cmdFmt(gpa, cmd_args); } else if (mem.eql(u8, cmd, "targets")) { - const info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); + const info = try std.zig.system.NativeTargetInfo.detect(arena, .{}); const stdout = io.getStdOut().outStream(); - return @import("print_targets.zig").cmdTargets(allocator, cmd_args, stdout, info.target); + return @import("print_targets.zig").cmdTargets(arena, cmd_args, stdout, info.target); } else if (mem.eql(u8, cmd, "version")) { - return cmdVersion(allocator, cmd_args); + // Need to set up the build script to give the version as a comptime value. + std.debug.warn("TODO version command not implemented yet\n", .{}); + return error.Unimplemented; } else if (mem.eql(u8, cmd, "zen")) { - return cmdZen(allocator, cmd_args); + try io.getStdOut().writeAll(info_zen); } else if (mem.eql(u8, cmd, "help")) { - return cmdHelp(allocator, cmd_args); - } else if (mem.eql(u8, cmd, "internal")) { - return cmdInternal(allocator, cmd_args); + try io.getStdOut().writeAll(usage); } else { - try stderr.print("unknown command: {}\n\n", .{args[1]}); - try stderr.writeAll(usage); + std.debug.warn("unknown command: {}\n\n{}", .{ args[1], usage }); process.exit(1); } } const usage_build_generic = - \\usage: zig build-exe [file] - \\ zig build-lib [file] - \\ zig build-obj [file] + \\Usage: zig build-exe [files] + \\ zig build-lib [files] + \\ zig build-obj [files] + \\ + \\Supported file types: + \\ (planned) .zig Zig source code + \\ .zir Zig Intermediate Representation code + \\ (planned) .o ELF object file + \\ (planned) .o MACH-O (macOS) object file + \\ (planned) .obj COFF (Windows) object file + \\ (planned) .lib COFF (Windows) static library + \\ (planned) .a ELF static library + \\ (planned) .so ELF shared object (dynamic link) + \\ (planned) .dll Windows Dynamic Link Library + \\ (planned) .dylib MACH-O (macOS) dynamic library + \\ (planned) .s Target-specific assembly source code + \\ (planned) .S Assembly with C preprocessor (requires LLVM extensions) + \\ (planned) .c C source code (requires LLVM extensions) + \\ (planned) .cpp C++ source code (requires LLVM extensions) + \\ Other C++ extensions: .C .cc .cxx \\ \\General Options: - \\ --help Print this help and exit - \\ --color [auto|off|on] Enable or disable colored error messages + \\ -h, --help Print this help and exit + \\ --watch Enable compiler REPL + \\ --color [auto|off|on] Enable or disable colored error messages + \\ -femit-bin[=path] (default) output machine code + \\ -fno-emit-bin Do not output machine code \\ \\Compile Options: - \\ --libc [file] Provide a file which specifies libc paths - \\ --assembly [source] Add assembly file to build - \\ --emit [filetype] Emit a specific file format as compilation output - \\ --enable-timing-info Print timing diagnostics - \\ --name [name] Override output name - \\ --output [file] Override destination path - \\ --output-h [file] Override generated header file path - \\ --pkg-begin [name] [path] Make package available to import and push current pkg - \\ --pkg-end Pop current pkg - \\ --mode [mode] Set the build mode - \\ debug (default) optimizations off, safety on - \\ release-fast optimizations on, safety off - \\ release-safe optimizations on, safety on - \\ release-small optimize for small binary, safety off - \\ --static Output will be statically linked - \\ --strip Exclude debug symbols - \\ -target [name] -- see the targets command - \\ --eh-frame-hdr enable C++ exception handling by passing --eh-frame-hdr to linker - \\ --verbose-tokenize Turn on compiler debug output for tokenization - \\ --verbose-ast-tree Turn on compiler debug output for parsing into an AST (tree view) - \\ --verbose-ast-fmt Turn on compiler debug output for parsing into an AST (render source) - \\ --verbose-link Turn on compiler debug output for linking - \\ --verbose-ir Turn on compiler debug output for Zig IR - \\ --verbose-llvm-ir Turn on compiler debug output for LLVM IR - \\ --verbose-cimport Turn on compiler debug output for C imports - \\ -dirafter [dir] Same as -isystem but do it last - \\ -isystem [dir] Add additional search path for other .h files - \\ -mllvm [arg] Additional arguments to forward to LLVM's option processing + \\ -target [name] -- see the targets command + \\ -mcpu [cpu] Specify target CPU and feature set + \\ --name [name] Override output name + \\ --mode [mode] Set the build mode + \\ Debug (default) optimizations off, safety on + \\ ReleaseFast optimizations on, safety off + \\ ReleaseSafe optimizations on, safety on + \\ ReleaseSmall optimize for small binary, safety off + \\ --dynamic Force output to be dynamically linked + \\ --strip Exclude debug symbols \\ \\Link Options: - \\ --ar-path [path] Set the path to ar - \\ --each-lib-rpath Add rpath for each used dynamic library - \\ --library [lib] Link against lib - \\ --forbid-library [lib] Make it an error to link against lib - \\ --library-path [dir] Add a directory to the library search path - \\ --linker-script [path] Use a custom linker script - \\ --object [obj] Add object file to build - \\ -rdynamic Add all symbols to the dynamic symbol table - \\ -rpath [path] Add directory to the runtime library search path - \\ -framework [name] (darwin) link against framework - \\ -mios-version-min [ver] (darwin) set iOS deployment target - \\ -mmacosx-version-min [ver] (darwin) set Mac OS X deployment target - \\ --ver-major [ver] Dynamic library semver major version - \\ --ver-minor [ver] Dynamic library semver minor version - \\ --ver-patch [ver] Dynamic library semver patch version + \\ -l[lib], --library [lib] Link against system library + \\ --dynamic-linker [path] Set the dynamic interpreter path (usually ld.so) + \\ --version [ver] Dynamic library semver \\ + \\Debug Options (Zig Compiler Development): + \\ -ftime-report Print timing diagnostics + \\ --debug-tokenize verbose tokenization + \\ --debug-ast-tree verbose parsing into an AST (tree view) + \\ --debug-ast-fmt verbose parsing into an AST (render source) + \\ --debug-ir verbose Zig IR + \\ --debug-link verbose linking + \\ --debug-codegen verbose machine code generation \\ ; -fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Compilation.Kind) !void { - const stderr = io.getStdErr().outStream(); +const Emit = union(enum) { + no, + yes_default_path, + yes: []const u8, +}; - var color: errmsg.Color = .Auto; +fn buildOutputType( + gpa: *Allocator, + arena: *Allocator, + args: []const []const u8, + output_mode: std.builtin.OutputMode, +) !void { + var color: Color = .Auto; var build_mode: std.builtin.Mode = .Debug; - var emit_bin = true; - var emit_asm = false; - var emit_llvm_ir = false; - var emit_h = false; var provided_name: ?[]const u8 = null; - var is_dynamic = false; + var link_mode: ?std.builtin.LinkMode = null; var root_src_file: ?[]const u8 = null; - var libc_arg: ?[]const u8 = null; var version: std.builtin.Version = .{ .major = 0, .minor = 0, .patch = 0 }; - var linker_script: ?[]const u8 = null; var strip = false; - var verbose_tokenize = false; - var verbose_ast_tree = false; - var verbose_ast_fmt = false; - var verbose_link = false; - var verbose_ir = false; - var verbose_llvm_ir = false; - var verbose_cimport = false; - var linker_rdynamic = false; - var link_eh_frame_hdr = false; - var macosx_version_min: ?[]const u8 = null; - var ios_version_min: ?[]const u8 = null; + var watch = false; + var debug_tokenize = false; + var debug_ast_tree = false; + var debug_ast_fmt = false; + var debug_link = false; + var debug_ir = false; + var debug_codegen = false; + var time_report = false; + var emit_bin: Emit = .yes_default_path; + var emit_zir: Emit = .no; + var target_arch_os_abi: []const u8 = "native"; + var target_mcpu: ?[]const u8 = null; + var target_dynamic_linker: ?[]const u8 = null; - var assembly_files = ArrayList([]const u8).init(allocator); - defer assembly_files.deinit(); - - var link_objects = ArrayList([]const u8).init(allocator); - defer link_objects.deinit(); - - var clang_argv_buf = ArrayList([]const u8).init(allocator); - defer clang_argv_buf.deinit(); - - var mllvm_flags = ArrayList([]const u8).init(allocator); - defer mllvm_flags.deinit(); - - var cur_pkg = try CliPkg.init(allocator, "", "", null); - defer cur_pkg.deinit(); - - var system_libs = ArrayList([]const u8).init(allocator); + var system_libs = std.ArrayList([]const u8).init(gpa); defer system_libs.deinit(); - var c_src_files = ArrayList([]const u8).init(allocator); - defer c_src_files.deinit(); - { var i: usize = 0; while (i < args.len) : (i += 1) { const arg = args[i]; if (mem.startsWith(u8, arg, "-")) { - if (mem.eql(u8, arg, "--help")) { + if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) { try io.getStdOut().writeAll(usage_build_generic); process.exit(0); } else if (mem.eql(u8, arg, "--color")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected [auto|on|off] after --color\n"); + std.debug.warn("expected [auto|on|off] after --color\n", .{}); process.exit(1); } i += 1; @@ -219,12 +195,12 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Co } else if (mem.eql(u8, next_arg, "off")) { color = .Off; } else { - try stderr.print("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); + std.debug.warn("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); process.exit(1); } } else if (mem.eql(u8, arg, "--mode")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode\n"); + std.debug.warn("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode\n", .{}); process.exit(1); } i += 1; @@ -238,289 +214,302 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Co } else if (mem.eql(u8, next_arg, "ReleaseSmall")) { build_mode = .ReleaseSmall; } else { - try stderr.print("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode, found '{}'\n", .{next_arg}); + std.debug.warn("expected [Debug|ReleaseSafe|ReleaseFast|ReleaseSmall] after --mode, found '{}'\n", .{next_arg}); process.exit(1); } } else if (mem.eql(u8, arg, "--name")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --name\n"); + std.debug.warn("expected parameter after --name\n", .{}); process.exit(1); } i += 1; provided_name = args[i]; - } else if (mem.eql(u8, arg, "--ver-major")) { + } else if (mem.eql(u8, arg, "--library")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --ver-major\n"); + std.debug.warn("expected parameter after --library\n", .{}); process.exit(1); } i += 1; - version.major = try std.fmt.parseInt(u32, args[i], 10); - } else if (mem.eql(u8, arg, "--ver-minor")) { + try system_libs.append(args[i]); + } else if (mem.eql(u8, arg, "--version")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --ver-minor\n"); + std.debug.warn("expected parameter after --version\n", .{}); process.exit(1); } i += 1; - version.minor = try std.fmt.parseInt(u32, args[i], 10); - } else if (mem.eql(u8, arg, "--ver-patch")) { + version = std.builtin.Version.parse(args[i]) catch |err| { + std.debug.warn("unable to parse --version '{}': {}\n", .{ args[i], @errorName(err) }); + process.exit(1); + }; + } else if (mem.eql(u8, arg, "-target")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --ver-patch\n"); + std.debug.warn("expected parameter after -target\n", .{}); process.exit(1); } i += 1; - version.patch = try std.fmt.parseInt(u32, args[i], 10); - } else if (mem.eql(u8, arg, "--linker-script")) { + target_arch_os_abi = args[i]; + } else if (mem.eql(u8, arg, "-mcpu")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --linker-script\n"); + std.debug.warn("expected parameter after -mcpu\n", .{}); process.exit(1); } i += 1; - linker_script = args[i]; - } else if (mem.eql(u8, arg, "--libc")) { + target_mcpu = args[i]; + } else if (mem.startsWith(u8, arg, "-mcpu=")) { + target_mcpu = arg["-mcpu=".len..]; + } else if (mem.eql(u8, arg, "--dynamic-linker")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after --libc\n"); + std.debug.warn("expected parameter after --dynamic-linker\n", .{}); process.exit(1); } i += 1; - libc_arg = args[i]; - } else if (mem.eql(u8, arg, "-mllvm")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after -mllvm\n"); - process.exit(1); - } - i += 1; - try clang_argv_buf.append("-mllvm"); - try clang_argv_buf.append(args[i]); - - try mllvm_flags.append(args[i]); - } else if (mem.eql(u8, arg, "-mmacosx-version-min")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after -mmacosx-version-min\n"); - process.exit(1); - } - i += 1; - macosx_version_min = args[i]; - } else if (mem.eql(u8, arg, "-mios-version-min")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected parameter after -mios-version-min\n"); - process.exit(1); - } - i += 1; - ios_version_min = args[i]; + target_dynamic_linker = args[i]; + } else if (mem.eql(u8, arg, "--watch")) { + watch = true; + } else if (mem.eql(u8, arg, "-ftime-report")) { + time_report = true; } else if (mem.eql(u8, arg, "-femit-bin")) { - emit_bin = true; + emit_bin = .yes_default_path; + } else if (mem.startsWith(u8, arg, "-femit-bin=")) { + emit_bin = .{ .yes = arg["-femit-bin=".len..] }; } else if (mem.eql(u8, arg, "-fno-emit-bin")) { - emit_bin = false; - } else if (mem.eql(u8, arg, "-femit-asm")) { - emit_asm = true; - } else if (mem.eql(u8, arg, "-fno-emit-asm")) { - emit_asm = false; - } else if (mem.eql(u8, arg, "-femit-llvm-ir")) { - emit_llvm_ir = true; - } else if (mem.eql(u8, arg, "-fno-emit-llvm-ir")) { - emit_llvm_ir = false; + emit_bin = .no; + } else if (mem.eql(u8, arg, "-femit-zir")) { + emit_zir = .yes_default_path; + } else if (mem.startsWith(u8, arg, "-femit-zir=")) { + emit_zir = .{ .yes = arg["-femit-zir=".len..] }; + } else if (mem.eql(u8, arg, "-fno-emit-zir")) { + emit_zir = .no; } else if (mem.eql(u8, arg, "-dynamic")) { - is_dynamic = true; + link_mode = .Dynamic; + } else if (mem.eql(u8, arg, "-static")) { + link_mode = .Static; } else if (mem.eql(u8, arg, "--strip")) { strip = true; - } else if (mem.eql(u8, arg, "--verbose-tokenize")) { - verbose_tokenize = true; - } else if (mem.eql(u8, arg, "--verbose-ast-tree")) { - verbose_ast_tree = true; - } else if (mem.eql(u8, arg, "--verbose-ast-fmt")) { - verbose_ast_fmt = true; - } else if (mem.eql(u8, arg, "--verbose-link")) { - verbose_link = true; - } else if (mem.eql(u8, arg, "--verbose-ir")) { - verbose_ir = true; - } else if (mem.eql(u8, arg, "--verbose-llvm-ir")) { - verbose_llvm_ir = true; - } else if (mem.eql(u8, arg, "--eh-frame-hdr")) { - link_eh_frame_hdr = true; - } else if (mem.eql(u8, arg, "--verbose-cimport")) { - verbose_cimport = true; - } else if (mem.eql(u8, arg, "-rdynamic")) { - linker_rdynamic = true; - } else if (mem.eql(u8, arg, "--pkg-begin")) { - if (i + 2 >= args.len) { - try stderr.writeAll("expected [name] [path] after --pkg-begin\n"); - process.exit(1); - } - i += 1; - const new_pkg_name = args[i]; - i += 1; - const new_pkg_path = args[i]; - - var new_cur_pkg = try CliPkg.init(allocator, new_pkg_name, new_pkg_path, cur_pkg); - try cur_pkg.children.append(new_cur_pkg); - cur_pkg = new_cur_pkg; - } else if (mem.eql(u8, arg, "--pkg-end")) { - if (cur_pkg.parent) |parent| { - cur_pkg = parent; - } else { - try stderr.writeAll("encountered --pkg-end with no matching --pkg-begin\n"); - process.exit(1); - } + } else if (mem.eql(u8, arg, "--debug-tokenize")) { + debug_tokenize = true; + } else if (mem.eql(u8, arg, "--debug-ast-tree")) { + debug_ast_tree = true; + } else if (mem.eql(u8, arg, "--debug-ast-fmt")) { + debug_ast_fmt = true; + } else if (mem.eql(u8, arg, "--debug-link")) { + debug_link = true; + } else if (mem.eql(u8, arg, "--debug-ir")) { + debug_ir = true; + } else if (mem.eql(u8, arg, "--debug-codegen")) { + debug_codegen = true; } else if (mem.startsWith(u8, arg, "-l")) { try system_libs.append(arg[2..]); } else { - try stderr.print("unrecognized parameter: '{}'", .{arg}); + std.debug.warn("unrecognized parameter: '{}'", .{arg}); process.exit(1); } - } else if (mem.endsWith(u8, arg, ".s")) { - try assembly_files.append(arg); + } else if (mem.endsWith(u8, arg, ".s") or mem.endsWith(u8, arg, ".S")) { + std.debug.warn("assembly files not supported yet", .{}); + process.exit(1); } else if (mem.endsWith(u8, arg, ".o") or mem.endsWith(u8, arg, ".obj") or mem.endsWith(u8, arg, ".a") or mem.endsWith(u8, arg, ".lib")) { - try link_objects.append(arg); + std.debug.warn("object files and static libraries not supported yet", .{}); + process.exit(1); } else if (mem.endsWith(u8, arg, ".c") or mem.endsWith(u8, arg, ".cpp")) { - try c_src_files.append(arg); - } else if (mem.endsWith(u8, arg, ".zig")) { + std.debug.warn("compilation of C and C++ source code requires LLVM extensions which are not implemented yet", .{}); + process.exit(1); + } else if (mem.endsWith(u8, arg, ".so") or + mem.endsWith(u8, arg, ".dylib") or + mem.endsWith(u8, arg, ".dll")) + { + std.debug.warn("linking against dynamic libraries not yet supported", .{}); + process.exit(1); + } else if (mem.endsWith(u8, arg, ".zig") or mem.endsWith(u8, arg, ".zir")) { if (root_src_file) |other| { - try stderr.print("found another zig file '{}' after root source file '{}'", .{ - arg, - other, - }); + std.debug.warn("found another zig file '{}' after root source file '{}'", .{ arg, other }); process.exit(1); } else { root_src_file = arg; } } else { - try stderr.print("unrecognized file extension of parameter '{}'", .{arg}); + std.debug.warn("unrecognized file extension of parameter '{}'", .{arg}); } } } - if (cur_pkg.parent != null) { - try stderr.print("unmatched --pkg-begin\n", .{}); - process.exit(1); - } - const root_name = if (provided_name) |n| n else blk: { if (root_src_file) |file| { const basename = fs.path.basename(file); var it = mem.split(basename, "."); break :blk it.next() orelse basename; } else { - try stderr.writeAll("--name [name] not provided and unable to infer\n"); + std.debug.warn("--name [name] not provided and unable to infer\n", .{}); process.exit(1); } }; - if (root_src_file == null and link_objects.len == 0 and assembly_files.len == 0) { - try stderr.writeAll("Expected source file argument or at least one --object or --assembly argument\n"); + if (system_libs.items.len != 0) { + std.debug.warn("linking against system libraries not yet supported", .{}); process.exit(1); } - if (out_type == Compilation.Kind.Obj and link_objects.len != 0) { - try stderr.writeAll("When building an object file, --object arguments are invalid\n"); + var diags: std.zig.CrossTarget.ParseOptions.Diagnostics = .{}; + const cross_target = std.zig.CrossTarget.parse(.{ + .arch_os_abi = target_arch_os_abi, + .cpu_features = target_mcpu, + .dynamic_linker = target_dynamic_linker, + .diagnostics = &diags, + }) catch |err| switch (err) { + error.UnknownCpuModel => { + std.debug.warn("Unknown CPU: '{}'\nAvailable CPUs for architecture '{}':\n", .{ + diags.cpu_name.?, + @tagName(diags.arch.?), + }); + for (diags.arch.?.allCpuModels()) |cpu| { + std.debug.warn(" {}\n", .{cpu.name}); + } + process.exit(1); + }, + error.UnknownCpuFeature => { + std.debug.warn( + \\Unknown CPU feature: '{}' + \\Available CPU features for architecture '{}': + \\ + , .{ + diags.unknown_feature_name, + @tagName(diags.arch.?), + }); + for (diags.arch.?.allFeaturesList()) |feature| { + std.debug.warn(" {}: {}\n", .{ feature.name, feature.description }); + } + process.exit(1); + }, + else => |e| return e, + }; + + const object_format: ?std.builtin.ObjectFormat = null; + var target_info = try std.zig.system.NativeTargetInfo.detect(gpa, cross_target); + if (target_info.cpu_detection_unimplemented) { + // TODO We want to just use detected_info.target but implementing + // CPU model & feature detection is todo so here we rely on LLVM. + std.debug.warn("CPU features detection is not yet available for this system without LLVM extensions\n", .{}); process.exit(1); } - try ZigCompiler.setLlvmArgv(allocator, mllvm_flags.span()); - - const zig_lib_dir = introspect.resolveZigLibDir(allocator) catch process.exit(1); - defer allocator.free(zig_lib_dir); - - var override_libc: LibCInstallation = undefined; - - var zig_compiler = try ZigCompiler.init(allocator); - defer zig_compiler.deinit(); - - var comp = try Compilation.create( - &zig_compiler, - root_name, - root_src_file, - .{}, - out_type, - build_mode, - !is_dynamic, - zig_lib_dir, - ); - defer comp.destroy(); - - if (libc_arg) |libc_path| { - parseLibcPaths(allocator, &override_libc, libc_path); - comp.override_libc = &override_libc; - } - - for (system_libs.span()) |lib| { - _ = try comp.addLinkLib(lib, true); - } - - comp.version = version; - comp.is_test = false; - comp.linker_script = linker_script; - comp.clang_argv = clang_argv_buf.span(); - comp.strip = strip; - - comp.verbose_tokenize = verbose_tokenize; - comp.verbose_ast_tree = verbose_ast_tree; - comp.verbose_ast_fmt = verbose_ast_fmt; - comp.verbose_link = verbose_link; - comp.verbose_ir = verbose_ir; - comp.verbose_llvm_ir = verbose_llvm_ir; - comp.verbose_cimport = verbose_cimport; - - comp.link_eh_frame_hdr = link_eh_frame_hdr; - - comp.err_color = color; - - comp.linker_rdynamic = linker_rdynamic; - - if (macosx_version_min != null and ios_version_min != null) { - try stderr.writeAll("-mmacosx-version-min and -mios-version-min options not allowed together\n"); + const src_path = root_src_file orelse { + std.debug.warn("expected at least one file argument", .{}); process.exit(1); - } + }; - if (macosx_version_min) |ver| { - comp.darwin_version_min = Compilation.DarwinVersionMin{ .MacOS = ver }; - } - if (ios_version_min) |ver| { - comp.darwin_version_min = Compilation.DarwinVersionMin{ .Ios = ver }; - } + const bin_path = switch (emit_bin) { + .no => { + std.debug.warn("-fno-emit-bin not supported yet", .{}); + process.exit(1); + }, + .yes_default_path => try std.fmt.allocPrint(arena, "{}{}", .{ root_name, target_info.target.exeFileExt() }), + .yes => |p| p, + }; - comp.emit_bin = emit_bin; - comp.emit_asm = emit_asm; - comp.emit_llvm_ir = emit_llvm_ir; - comp.emit_h = emit_h; - comp.assembly_files = assembly_files.span(); - comp.link_objects = link_objects.span(); - - comp.start(); - processBuildEvents(comp, color); -} - -fn processBuildEvents(comp: *Compilation, color: errmsg.Color) void { - const stderr_file = io.getStdErr(); - const stderr = stderr_file.outStream(); - var count: usize = 0; - while (!comp.cancelled) { - const build_event = comp.events.get(); - count += 1; - - switch (build_event) { - .Ok => { - stderr.print("Build {} succeeded\n", .{count}) catch process.exit(1); - }, - .Error => |err| { - stderr.print("Build {} failed: {}\n", .{ count, @errorName(err) }) catch process.exit(1); - }, - .Fail => |msgs| { - stderr.print("Build {} compile errors:\n", .{count}) catch process.exit(1); - for (msgs) |msg| { - defer msg.destroy(); - msg.printToFile(stderr_file, color) catch process.exit(1); + const zir_out_path: ?[]const u8 = switch (emit_zir) { + .no => null, + .yes_default_path => blk: { + if (root_src_file) |rsf| { + if (mem.endsWith(u8, rsf, ".zir")) { + break :blk try std.fmt.allocPrint(arena, "{}.out.zir", .{root_name}); } - }, + } + break :blk try std.fmt.allocPrint(arena, "{}.zir", .{root_name}); + }, + .yes => |p| p, + }; + + const root_pkg = try Package.create(gpa, fs.cwd(), ".", src_path); + defer root_pkg.destroy(); + + var module = try Module.init(gpa, .{ + .target = target_info.target, + .output_mode = output_mode, + .root_pkg = root_pkg, + .bin_file_dir = fs.cwd(), + .bin_file_path = bin_path, + .link_mode = link_mode, + .object_format = object_format, + .optimize_mode = build_mode, + }); + defer module.deinit(); + + const stdin = std.io.getStdIn().inStream(); + const stderr = std.io.getStdErr().outStream(); + var repl_buf: [1024]u8 = undefined; + + try updateModule(gpa, &module, zir_out_path); + + while (watch) { + try stderr.print("🦎 ", .{}); + if (output_mode == .Exe) { + try module.makeBinFileExecutable(); + } + if (stdin.readUntilDelimiterOrEof(&repl_buf, '\n') catch |err| { + try stderr.print("\nUnable to parse command: {}\n", .{@errorName(err)}); + continue; + }) |line| { + if (mem.eql(u8, line, "update")) { + if (output_mode == .Exe) { + try module.makeBinFileWritable(); + } + try updateModule(gpa, &module, zir_out_path); + } else if (mem.eql(u8, line, "exit")) { + break; + } else if (mem.eql(u8, line, "help")) { + try stderr.writeAll(repl_help); + } else { + try stderr.print("unknown command: {}\n", .{line}); + } + } else { + break; } } } +fn updateModule(gpa: *Allocator, module: *Module, zir_out_path: ?[]const u8) !void { + try module.update(); + + var errors = try module.getAllErrorsAlloc(); + defer errors.deinit(module.allocator); + + if (errors.list.len != 0) { + for (errors.list) |full_err_msg| { + std.debug.warn("{}:{}:{}: error: {}\n", .{ + full_err_msg.src_path, + full_err_msg.line + 1, + full_err_msg.column + 1, + full_err_msg.msg, + }); + } + } + + if (zir_out_path) |zop| { + var new_zir_module = try zir.emit(gpa, module.*); + defer new_zir_module.deinit(gpa); + + const baf = try io.BufferedAtomicFile.create(gpa, fs.cwd(), zop, .{}); + defer baf.destroy(); + + try new_zir_module.writeToStream(gpa, baf.stream()); + + try baf.finish(); + } +} + +const repl_help = + \\Commands: + \\ update Detect changes to source files and update output files. + \\ help Print this text + \\ exit Quit this repl + \\ +; + pub const usage_fmt = \\usage: zig fmt [file]... \\ @@ -539,58 +528,20 @@ pub const usage_fmt = ; const Fmt = struct { - seen: event.Locked(SeenMap), + seen: SeenMap, any_error: bool, - color: errmsg.Color, - allocator: *Allocator, + color: Color, + gpa: *Allocator, - const SeenMap = std.StringHashMap(void); + const SeenMap = std.BufSet; }; -fn parseLibcPaths(allocator: *Allocator, libc: *LibCInstallation, libc_paths_file: []const u8) void { - const stderr = io.getStdErr().outStream(); - libc.* = LibCInstallation.parse(allocator, libc_paths_file, stderr) catch |err| { - stderr.print("Unable to parse libc path file '{}': {}.\n" ++ - "Try running `zig libc` to see an example for the native target.\n", .{ - libc_paths_file, - @errorName(err), - }) catch {}; - process.exit(1); - }; -} - -fn cmdLibC(allocator: *Allocator, args: []const []const u8) !void { - const stderr = io.getStdErr().outStream(); - switch (args.len) { - 0 => {}, - 1 => { - var libc_installation: LibCInstallation = undefined; - parseLibcPaths(allocator, &libc_installation, args[0]); - return; - }, - else => { - try stderr.print("unexpected extra parameter: {}\n", .{args[1]}); - process.exit(1); - }, - } - - var zig_compiler = try ZigCompiler.init(allocator); - defer zig_compiler.deinit(); - - const libc = zig_compiler.getNativeLibC() catch |err| { - stderr.print("unable to find libc: {}\n", .{@errorName(err)}) catch {}; - process.exit(1); - }; - libc.render(io.getStdOut().outStream()) catch process.exit(1); -} - -fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { +pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void { const stderr_file = io.getStdErr(); - const stderr = stderr_file.outStream(); - var color: errmsg.Color = .Auto; + var color: Color = .Auto; var stdin_flag: bool = false; var check_flag: bool = false; - var input_files = ArrayList([]const u8).init(allocator); + var input_files = ArrayList([]const u8).init(gpa); { var i: usize = 0; @@ -603,7 +554,7 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { process.exit(0); } else if (mem.eql(u8, arg, "--color")) { if (i + 1 >= args.len) { - try stderr.writeAll("expected [auto|on|off] after --color\n"); + std.debug.warn("expected [auto|on|off] after --color\n", .{}); process.exit(1); } i += 1; @@ -615,7 +566,7 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, next_arg, "off")) { color = .Off; } else { - try stderr.print("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); + std.debug.warn("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); process.exit(1); } } else if (mem.eql(u8, arg, "--stdin")) { @@ -623,7 +574,7 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, arg, "--check")) { check_flag = true; } else { - try stderr.print("unrecognized parameter: '{}'", .{arg}); + std.debug.warn("unrecognized parameter: '{}'", .{arg}); process.exit(1); } } else { @@ -633,60 +584,55 @@ fn cmdFmt(allocator: *Allocator, args: []const []const u8) !void { } if (stdin_flag) { - if (input_files.len != 0) { - try stderr.writeAll("cannot use --stdin with positional arguments\n"); + if (input_files.items.len != 0) { + std.debug.warn("cannot use --stdin with positional arguments\n", .{}); process.exit(1); } const stdin = io.getStdIn().inStream(); - const source_code = try stdin.readAllAlloc(allocator, max_src_size); - defer allocator.free(source_code); + const source_code = try stdin.readAllAlloc(gpa, max_src_size); + defer gpa.free(source_code); - const tree = std.zig.parse(allocator, source_code) catch |err| { - try stderr.print("error parsing stdin: {}\n", .{err}); + const tree = std.zig.parse(gpa, source_code) catch |err| { + std.debug.warn("error parsing stdin: {}\n", .{err}); process.exit(1); }; defer tree.deinit(); var error_it = tree.errors.iterator(0); while (error_it.next()) |parse_error| { - const msg = try errmsg.Msg.createFromParseError(allocator, parse_error, tree, ""); - defer msg.destroy(); - - try msg.printToFile(io.getStdErr(), color); + try printErrMsgToFile(gpa, parse_error, tree, "", stderr_file, color); } if (tree.errors.len != 0) { process.exit(1); } if (check_flag) { - const anything_changed = try std.zig.render(allocator, io.null_out_stream, tree); - const code: u8 = if (anything_changed) 1 else 0; + const anything_changed = try std.zig.render(gpa, io.null_out_stream, tree); + const code = if (anything_changed) @as(u8, 1) else @as(u8, 0); process.exit(code); } const stdout = io.getStdOut().outStream(); - _ = try std.zig.render(allocator, stdout, tree); + _ = try std.zig.render(gpa, stdout, tree); return; } - if (input_files.len == 0) { - try stderr.writeAll("expected at least one source file argument\n"); + if (input_files.items.len == 0) { + std.debug.warn("expected at least one source file argument\n", .{}); process.exit(1); } var fmt = Fmt{ - .allocator = allocator, - .seen = event.Locked(Fmt.SeenMap).init(Fmt.SeenMap.init(allocator)), + .gpa = gpa, + .seen = Fmt.SeenMap.init(gpa), .any_error = false, .color = color, }; - var group = event.Group(FmtError!void).init(allocator); for (input_files.span()) |file_path| { - try group.call(fmtPath, .{ &fmt, file_path, check_flag }); + try fmtPath(&fmt, file_path, check_flag); } - try group.wait(); if (fmt.any_error) { process.exit(1); } @@ -711,53 +657,45 @@ const FmtError = error{ ReadOnlyFileSystem, LinkQuotaExceeded, FileBusy, - CurrentWorkingDirectoryUnlinked, } || fs.File.OpenError; -fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) callconv(.Async) FmtError!void { - const stderr_file = io.getStdErr(); - const stderr = stderr_file.outStream(); - const file_path = try std.mem.dupe(fmt.allocator, u8, file_path_ref); - defer fmt.allocator.free(file_path); +fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool) FmtError!void { + // get the real path here to avoid Windows failing on relative file paths with . or .. in them + var real_path = fs.realpathAlloc(fmt.gpa, file_path) catch |err| { + std.debug.warn("unable to open '{}': {}\n", .{ file_path, err }); + fmt.any_error = true; + return; + }; + defer fmt.gpa.free(real_path); - { - const held = fmt.seen.acquire(); - defer held.release(); + if (fmt.seen.exists(real_path)) return; + try fmt.seen.put(real_path); - if (try held.value.put(file_path, {})) |_| return; - } - - const source_code = fs.cwd().readFileAlloc( - fmt.allocator, - file_path, - max_src_size, - ) catch |err| switch (err) { + const source_code = fs.cwd().readFileAlloc(fmt.gpa, real_path, max_src_size) catch |err| switch (err) { error.IsDir, error.AccessDenied => { var dir = try fs.cwd().openDir(file_path, .{ .iterate = true }); defer dir.close(); - var group = event.Group(FmtError!void).init(fmt.allocator); - var it = dir.iterate(); - while (try it.next()) |entry| { + var dir_it = dir.iterate(); + + while (try dir_it.next()) |entry| { if (entry.kind == .Directory or mem.endsWith(u8, entry.name, ".zig")) { - const full_path = try fs.path.join(fmt.allocator, &[_][]const u8{ file_path, entry.name }); - @panic("TODO https://github.com/ziglang/zig/issues/3777"); - // try group.call(fmtPath, .{fmt, full_path, check_mode}); + const full_path = try fs.path.join(fmt.gpa, &[_][]const u8{ file_path, entry.name }); + try fmtPath(fmt, full_path, check_mode); } } - return group.wait(); + return; }, else => { - // TODO lock stderr printing - try stderr.print("unable to open '{}': {}\n", .{ file_path, err }); + std.debug.warn("unable to open '{}': {}\n", .{ file_path, err }); fmt.any_error = true; return; }, }; - defer fmt.allocator.free(source_code); + defer fmt.gpa.free(source_code); - const tree = std.zig.parse(fmt.allocator, source_code) catch |err| { - try stderr.print("error parsing file '{}': {}\n", .{ file_path, err }); + const tree = std.zig.parse(fmt.gpa, source_code) catch |err| { + std.debug.warn("error parsing file '{}': {}\n", .{ file_path, err }); fmt.any_error = true; return; }; @@ -765,10 +703,7 @@ fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) callconv(.Asy var error_it = tree.errors.iterator(0); while (error_it.next()) |parse_error| { - const msg = try errmsg.Msg.createFromParseError(fmt.allocator, parse_error, tree, file_path); - defer fmt.allocator.destroy(msg); - - try msg.printToFile(stderr_file, fmt.color); + try printErrMsgToFile(fmt.gpa, parse_error, tree, file_path, std.io.getStdErr(), fmt.color); } if (tree.errors.len != 0) { fmt.any_error = true; @@ -776,32 +711,67 @@ fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) callconv(.Asy } if (check_mode) { - const anything_changed = try std.zig.render(fmt.allocator, io.null_out_stream, tree); + const anything_changed = try std.zig.render(fmt.gpa, io.null_out_stream, tree); if (anything_changed) { - try stderr.print("{}\n", .{file_path}); + std.debug.warn("{}\n", .{file_path}); fmt.any_error = true; } } else { - // TODO make this evented - const baf = try io.BufferedAtomicFile.create(fmt.allocator, file_path); + const baf = try io.BufferedAtomicFile.create(fmt.gpa, fs.cwd(), real_path, .{}); defer baf.destroy(); - const anything_changed = try std.zig.render(fmt.allocator, baf.stream(), tree); + const anything_changed = try std.zig.render(fmt.gpa, baf.stream(), tree); if (anything_changed) { - try stderr.print("{}\n", .{file_path}); + std.debug.warn("{}\n", .{file_path}); try baf.finish(); } } } -fn cmdVersion(allocator: *Allocator, args: []const []const u8) !void { - const stdout = io.getStdOut().outStream(); - try stdout.print("{}\n", .{c.ZIG_VERSION_STRING}); -} +fn printErrMsgToFile( + gpa: *mem.Allocator, + parse_error: *const ast.Error, + tree: *ast.Tree, + path: []const u8, + file: fs.File, + color: Color, +) !void { + const color_on = switch (color) { + .Auto => file.isTty(), + .On => true, + .Off => false, + }; + const lok_token = parse_error.loc(); + const span_first = lok_token; + const span_last = lok_token; -fn cmdHelp(allocator: *Allocator, args: []const []const u8) !void { - const stdout = io.getStdOut(); - try stdout.writeAll(usage); + const first_token = tree.tokens.at(span_first); + const last_token = tree.tokens.at(span_last); + const start_loc = tree.tokenLocationPtr(0, first_token); + const end_loc = tree.tokenLocationPtr(first_token.end, last_token); + + var text_buf = std.ArrayList(u8).init(gpa); + defer text_buf.deinit(); + const out_stream = text_buf.outStream(); + try parse_error.render(&tree.tokens, out_stream); + const text = text_buf.span(); + + const stream = file.outStream(); + try stream.print("{}:{}:{}: error: {}\n", .{ path, start_loc.line + 1, start_loc.column + 1, text }); + + if (!color_on) return; + + // Print \r and \t as one space each so that column counts line up + for (tree.source[start_loc.line_start..start_loc.line_end]) |byte| { + try stream.writeByte(switch (byte) { + '\r', '\t' => ' ', + else => byte, + }); + } + try stream.writeByte('\n'); + try stream.writeByteNTimes(' ', start_loc.column); + try stream.writeByteNTimes('~', last_token.end - first_token.start); + try stream.writeByte('\n'); } pub const info_zen = @@ -816,90 +786,8 @@ pub const info_zen = \\ * Avoid local maximums. \\ * Reduce the amount one must remember. \\ * Minimize energy spent on coding style. + \\ * Resource deallocation must succeed. \\ * Together we serve end users. \\ \\ ; - -fn cmdZen(allocator: *Allocator, args: []const []const u8) !void { - try io.getStdOut().writeAll(info_zen); -} - -const usage_internal = - \\usage: zig internal [subcommand] - \\ - \\Sub-Commands: - \\ build-info Print static compiler build-info - \\ - \\ -; - -fn cmdInternal(allocator: *Allocator, args: []const []const u8) !void { - const stderr = io.getStdErr().outStream(); - if (args.len == 0) { - try stderr.writeAll(usage_internal); - process.exit(1); - } - - const sub_commands = [_]Command{Command{ - .name = "build-info", - .exec = cmdInternalBuildInfo, - }}; - - inline for (sub_commands) |sub_command| { - if (mem.eql(u8, sub_command.name, args[0])) { - var frame = try allocator.create(@Frame(sub_command.exec)); - defer allocator.destroy(frame); - frame.* = async sub_command.exec(allocator, args[1..]); - return await frame; - } - } - - try stderr.print("unknown sub command: {}\n\n", .{args[0]}); - try stderr.writeAll(usage_internal); -} - -fn cmdInternalBuildInfo(allocator: *Allocator, args: []const []const u8) !void { - const stdout = io.getStdOut().outStream(); - try stdout.print( - \\ZIG_CMAKE_BINARY_DIR {} - \\ZIG_CXX_COMPILER {} - \\ZIG_LLD_INCLUDE_PATH {} - \\ZIG_LLD_LIBRARIES {} - \\ZIG_LLVM_CONFIG_EXE {} - \\ZIG_DIA_GUIDS_LIB {} - \\ - , .{ - c.ZIG_CMAKE_BINARY_DIR, - c.ZIG_CXX_COMPILER, - c.ZIG_LLD_INCLUDE_PATH, - c.ZIG_LLD_LIBRARIES, - c.ZIG_LLVM_CONFIG_EXE, - c.ZIG_DIA_GUIDS_LIB, - }); -} - -const CliPkg = struct { - name: []const u8, - path: []const u8, - children: ArrayList(*CliPkg), - parent: ?*CliPkg, - - pub fn init(allocator: *mem.Allocator, name: []const u8, path: []const u8, parent: ?*CliPkg) !*CliPkg { - var pkg = try allocator.create(CliPkg); - pkg.* = CliPkg{ - .name = name, - .path = path, - .children = ArrayList(*CliPkg).init(allocator), - .parent = parent, - }; - return pkg; - } - - pub fn deinit(self: *CliPkg) void { - for (self.children.span()) |child| { - child.deinit(); - } - self.children.deinit(); - } -}; diff --git a/src-self-hosted/package.zig b/src-self-hosted/package.zig deleted file mode 100644 index 3111555878..0000000000 --- a/src-self-hosted/package.zig +++ /dev/null @@ -1,31 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const assert = std.debug.assert; -const ArrayListSentineled = std.ArrayListSentineled; - -pub const Package = struct { - root_src_dir: ArrayListSentineled(u8, 0), - root_src_path: ArrayListSentineled(u8, 0), - - /// relative to root_src_dir - table: Table, - - pub const Table = std.StringHashMap(*Package); - - /// makes internal copies of root_src_dir and root_src_path - /// allocator should be an arena allocator because Package never frees anything - pub fn create(allocator: *mem.Allocator, root_src_dir: []const u8, root_src_path: []const u8) !*Package { - const ptr = try allocator.create(Package); - ptr.* = Package{ - .root_src_dir = try ArrayListSentineled(u8, 0).init(allocator, root_src_dir), - .root_src_path = try ArrayListSentineled(u8, 0).init(allocator, root_src_path), - .table = Table.init(allocator), - }; - return ptr; - } - - pub fn add(self: *Package, name: []const u8, package: *Package) !void { - const entry = try self.table.put(try mem.dupe(self.table.allocator, u8, name), package); - assert(entry == null); - } -}; diff --git a/src-self-hosted/scope.zig b/src-self-hosted/scope.zig deleted file mode 100644 index c294bf8b7c..0000000000 --- a/src-self-hosted/scope.zig +++ /dev/null @@ -1,418 +0,0 @@ -const std = @import("std"); -const Allocator = mem.Allocator; -const Decl = @import("decl.zig").Decl; -const Compilation = @import("compilation.zig").Compilation; -const mem = std.mem; -const ast = std.zig.ast; -const Value = @import("value.zig").Value; -const Type = @import("type.zig").Type; -const ir = @import("ir.zig"); -const Span = @import("errmsg.zig").Span; -const assert = std.debug.assert; -const event = std.event; -const llvm = @import("llvm.zig"); - -pub const Scope = struct { - id: Id, - parent: ?*Scope, - ref_count: std.atomic.Int(usize), - - /// Thread-safe - pub fn ref(base: *Scope) void { - _ = base.ref_count.incr(); - } - - /// Thread-safe - pub fn deref(base: *Scope, comp: *Compilation) void { - if (base.ref_count.decr() == 1) { - if (base.parent) |parent| parent.deref(comp); - switch (base.id) { - .Root => @fieldParentPtr(Root, "base", base).destroy(comp), - .Decls => @fieldParentPtr(Decls, "base", base).destroy(comp), - .Block => @fieldParentPtr(Block, "base", base).destroy(comp), - .FnDef => @fieldParentPtr(FnDef, "base", base).destroy(comp), - .CompTime => @fieldParentPtr(CompTime, "base", base).destroy(comp), - .Defer => @fieldParentPtr(Defer, "base", base).destroy(comp), - .DeferExpr => @fieldParentPtr(DeferExpr, "base", base).destroy(comp), - .Var => @fieldParentPtr(Var, "base", base).destroy(comp), - .AstTree => @fieldParentPtr(AstTree, "base", base).destroy(comp), - } - } - } - - pub fn findRoot(base: *Scope) *Root { - var scope = base; - while (scope.parent) |parent| { - scope = parent; - } - assert(scope.id == .Root); - return @fieldParentPtr(Root, "base", scope); - } - - pub fn findFnDef(base: *Scope) ?*FnDef { - var scope = base; - while (true) { - switch (scope.id) { - .FnDef => return @fieldParentPtr(FnDef, "base", scope), - .Root, .Decls => return null, - - .Block, - .Defer, - .DeferExpr, - .CompTime, - .Var, - => scope = scope.parent.?, - - .AstTree => unreachable, - } - } - } - - pub fn findDeferExpr(base: *Scope) ?*DeferExpr { - var scope = base; - while (true) { - switch (scope.id) { - .DeferExpr => return @fieldParentPtr(DeferExpr, "base", scope), - - .FnDef, - .Decls, - => return null, - - .Block, - .Defer, - .CompTime, - .Root, - .Var, - => scope = scope.parent orelse return null, - - .AstTree => unreachable, - } - } - } - - fn init(base: *Scope, id: Id, parent: *Scope) void { - base.* = Scope{ - .id = id, - .parent = parent, - .ref_count = std.atomic.Int(usize).init(1), - }; - parent.ref(); - } - - pub const Id = enum { - Root, - AstTree, - Decls, - Block, - FnDef, - CompTime, - Defer, - DeferExpr, - Var, - }; - - pub const Root = struct { - base: Scope, - realpath: []const u8, - decls: *Decls, - - /// Creates a Root scope with 1 reference - /// Takes ownership of realpath - pub fn create(comp: *Compilation, realpath: []u8) !*Root { - const self = try comp.gpa().create(Root); - self.* = Root{ - .base = Scope{ - .id = .Root, - .parent = null, - .ref_count = std.atomic.Int(usize).init(1), - }, - .realpath = realpath, - .decls = undefined, - }; - errdefer comp.gpa().destroy(self); - self.decls = try Decls.create(comp, &self.base); - return self; - } - - pub fn destroy(self: *Root, comp: *Compilation) void { - // TODO comp.fs_watch.removeFile(self.realpath); - self.decls.base.deref(comp); - comp.gpa().free(self.realpath); - comp.gpa().destroy(self); - } - }; - - pub const AstTree = struct { - base: Scope, - tree: *ast.Tree, - - /// Creates a scope with 1 reference - /// Takes ownership of tree, will deinit and destroy when done. - pub fn create(comp: *Compilation, tree: *ast.Tree, root_scope: *Root) !*AstTree { - const self = try comp.gpa().create(AstTree); - self.* = AstTree{ - .base = undefined, - .tree = tree, - }; - self.base.init(.AstTree, &root_scope.base); - - return self; - } - - pub fn destroy(self: *AstTree, comp: *Compilation) void { - comp.gpa().free(self.tree.source); - self.tree.deinit(); - comp.gpa().destroy(self); - } - - pub fn root(self: *AstTree) *Root { - return self.base.findRoot(); - } - }; - - pub const Decls = struct { - base: Scope, - - /// This table remains Write Locked when the names are incomplete or possibly outdated. - /// So if a reader manages to grab a lock, it can be sure that the set of names is complete - /// and correct. - table: event.RwLocked(Decl.Table), - - /// Creates a Decls scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope) !*Decls { - const self = try comp.gpa().create(Decls); - self.* = Decls{ - .base = undefined, - .table = event.RwLocked(Decl.Table).init(Decl.Table.init(comp.gpa())), - }; - self.base.init(.Decls, parent); - return self; - } - - pub fn destroy(self: *Decls, comp: *Compilation) void { - self.table.deinit(); - comp.gpa().destroy(self); - } - }; - - pub const Block = struct { - base: Scope, - incoming_values: std.ArrayList(*ir.Inst), - incoming_blocks: std.ArrayList(*ir.BasicBlock), - end_block: *ir.BasicBlock, - is_comptime: *ir.Inst, - - safety: Safety, - - const Safety = union(enum) { - Auto, - Manual: Manual, - - const Manual = struct { - /// the source span that disabled the safety value - span: Span, - - /// whether safety is enabled - enabled: bool, - }; - - fn get(self: Safety, comp: *Compilation) bool { - return switch (self) { - .Auto => switch (comp.build_mode) { - .Debug, - .ReleaseSafe, - => true, - .ReleaseFast, - .ReleaseSmall, - => false, - }, - .Manual => |man| man.enabled, - }; - } - }; - - /// Creates a Block scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope) !*Block { - const self = try comp.gpa().create(Block); - self.* = Block{ - .base = undefined, - .incoming_values = undefined, - .incoming_blocks = undefined, - .end_block = undefined, - .is_comptime = undefined, - .safety = Safety.Auto, - }; - self.base.init(.Block, parent); - return self; - } - - pub fn destroy(self: *Block, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const FnDef = struct { - base: Scope, - - /// This reference is not counted so that the scope can get destroyed with the function - fn_val: ?*Value.Fn, - - /// Creates a FnDef scope with 1 reference - /// Must set the fn_val later - pub fn create(comp: *Compilation, parent: *Scope) !*FnDef { - const self = try comp.gpa().create(FnDef); - self.* = FnDef{ - .base = undefined, - .fn_val = null, - }; - self.base.init(.FnDef, parent); - return self; - } - - pub fn destroy(self: *FnDef, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const CompTime = struct { - base: Scope, - - /// Creates a CompTime scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope) !*CompTime { - const self = try comp.gpa().create(CompTime); - self.* = CompTime{ .base = undefined }; - self.base.init(.CompTime, parent); - return self; - } - - pub fn destroy(self: *CompTime, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const Defer = struct { - base: Scope, - defer_expr_scope: *DeferExpr, - kind: Kind, - - pub const Kind = enum { - ScopeExit, - ErrorExit, - }; - - /// Creates a Defer scope with 1 reference - pub fn create( - comp: *Compilation, - parent: *Scope, - kind: Kind, - defer_expr_scope: *DeferExpr, - ) !*Defer { - const self = try comp.gpa().create(Defer); - self.* = Defer{ - .base = undefined, - .defer_expr_scope = defer_expr_scope, - .kind = kind, - }; - self.base.init(.Defer, parent); - defer_expr_scope.base.ref(); - return self; - } - - pub fn destroy(self: *Defer, comp: *Compilation) void { - self.defer_expr_scope.base.deref(comp); - comp.gpa().destroy(self); - } - }; - - pub const DeferExpr = struct { - base: Scope, - expr_node: *ast.Node, - reported_err: bool, - - /// Creates a DeferExpr scope with 1 reference - pub fn create(comp: *Compilation, parent: *Scope, expr_node: *ast.Node) !*DeferExpr { - const self = try comp.gpa().create(DeferExpr); - self.* = DeferExpr{ - .base = undefined, - .expr_node = expr_node, - .reported_err = false, - }; - self.base.init(.DeferExpr, parent); - return self; - } - - pub fn destroy(self: *DeferExpr, comp: *Compilation) void { - comp.gpa().destroy(self); - } - }; - - pub const Var = struct { - base: Scope, - name: []const u8, - src_node: *ast.Node, - data: Data, - - pub const Data = union(enum) { - Param: Param, - Const: *Value, - }; - - pub const Param = struct { - index: usize, - typ: *Type, - llvm_value: *llvm.Value, - }; - - pub fn createParam( - comp: *Compilation, - parent: *Scope, - name: []const u8, - src_node: *ast.Node, - param_index: usize, - param_type: *Type, - ) !*Var { - const self = try create(comp, parent, name, src_node); - self.data = Data{ - .Param = Param{ - .index = param_index, - .typ = param_type, - .llvm_value = undefined, - }, - }; - return self; - } - - pub fn createConst( - comp: *Compilation, - parent: *Scope, - name: []const u8, - src_node: *ast.Node, - value: *Value, - ) !*Var { - const self = try create(comp, parent, name, src_node); - self.data = Data{ .Const = value }; - value.ref(); - return self; - } - - fn create(comp: *Compilation, parent: *Scope, name: []const u8, src_node: *ast.Node) !*Var { - const self = try comp.gpa().create(Var); - self.* = Var{ - .base = undefined, - .name = name, - .src_node = src_node, - .data = undefined, - }; - self.base.init(.Var, parent); - return self; - } - - pub fn destroy(self: *Var, comp: *Compilation) void { - switch (self.data) { - .Param => {}, - .Const => |value| value.deref(comp), - } - comp.gpa().destroy(self); - } - }; -}; diff --git a/src-self-hosted/stage2.zig b/src-self-hosted/stage2.zig index d93d069915..bd24ffb399 100644 --- a/src-self-hosted/stage2.zig +++ b/src-self-hosted/stage2.zig @@ -12,7 +12,6 @@ const ArrayListSentineled = std.ArrayListSentineled; const Target = std.Target; const CrossTarget = std.zig.CrossTarget; const self_hosted_main = @import("main.zig"); -const errmsg = @import("errmsg.zig"); const DepTokenizer = @import("dep_tokenizer.zig").Tokenizer; const assert = std.debug.assert; const LibCInstallation = @import("libc_installation.zig").LibCInstallation; @@ -168,8 +167,6 @@ export fn stage2_render_ast(tree: *ast.Tree, output_file: *FILE) Error { return .None; } -// TODO: just use the actual self-hosted zig fmt. Until https://github.com/ziglang/zig/issues/2377, -// we use a blocking implementation. export fn stage2_fmt(argc: c_int, argv: [*]const [*:0]const u8) c_int { if (std.debug.runtime_safety) { fmtMain(argc, argv) catch unreachable; @@ -191,258 +188,9 @@ fn fmtMain(argc: c_int, argv: [*]const [*:0]const u8) !void { try args_list.append(mem.spanZ(argv[arg_i])); } - stdout = std.io.getStdOut().outStream(); - stderr_file = std.io.getStdErr(); - stderr = stderr_file.outStream(); - const args = args_list.span()[2..]; - var color: errmsg.Color = .Auto; - var stdin_flag: bool = false; - var check_flag: bool = false; - var input_files = ArrayList([]const u8).init(allocator); - - { - var i: usize = 0; - while (i < args.len) : (i += 1) { - const arg = args[i]; - if (mem.startsWith(u8, arg, "-")) { - if (mem.eql(u8, arg, "--help")) { - try stdout.writeAll(self_hosted_main.usage_fmt); - process.exit(0); - } else if (mem.eql(u8, arg, "--color")) { - if (i + 1 >= args.len) { - try stderr.writeAll("expected [auto|on|off] after --color\n"); - process.exit(1); - } - i += 1; - const next_arg = args[i]; - if (mem.eql(u8, next_arg, "auto")) { - color = .Auto; - } else if (mem.eql(u8, next_arg, "on")) { - color = .On; - } else if (mem.eql(u8, next_arg, "off")) { - color = .Off; - } else { - try stderr.print("expected [auto|on|off] after --color, found '{}'\n", .{next_arg}); - process.exit(1); - } - } else if (mem.eql(u8, arg, "--stdin")) { - stdin_flag = true; - } else if (mem.eql(u8, arg, "--check")) { - check_flag = true; - } else { - try stderr.print("unrecognized parameter: '{}'", .{arg}); - process.exit(1); - } - } else { - try input_files.append(arg); - } - } - } - - if (stdin_flag) { - if (input_files.items.len != 0) { - try stderr.writeAll("cannot use --stdin with positional arguments\n"); - process.exit(1); - } - - const stdin_file = io.getStdIn(); - var stdin = stdin_file.inStream(); - - const source_code = try stdin.readAllAlloc(allocator, self_hosted_main.max_src_size); - defer allocator.free(source_code); - - const tree = std.zig.parse(allocator, source_code) catch |err| { - try stderr.print("error parsing stdin: {}\n", .{err}); - process.exit(1); - }; - defer tree.deinit(); - - var error_it = tree.errors.iterator(0); - while (error_it.next()) |parse_error| { - try printErrMsgToFile(allocator, parse_error, tree, "", stderr_file, color); - } - if (tree.errors.len != 0) { - process.exit(1); - } - if (check_flag) { - const anything_changed = try std.zig.render(allocator, io.null_out_stream, tree); - const code = if (anything_changed) @as(u8, 1) else @as(u8, 0); - process.exit(code); - } - - _ = try std.zig.render(allocator, stdout, tree); - return; - } - - if (input_files.items.len == 0) { - try stderr.writeAll("expected at least one source file argument\n"); - process.exit(1); - } - - var fmt = Fmt{ - .seen = Fmt.SeenMap.init(allocator), - .any_error = false, - .color = color, - .allocator = allocator, - }; - - for (input_files.span()) |file_path| { - try fmtPath(&fmt, file_path, check_flag); - } - if (fmt.any_error) { - process.exit(1); - } -} - -const FmtError = error{ - SystemResources, - OperationAborted, - IoPending, - BrokenPipe, - Unexpected, - WouldBlock, - FileClosed, - DestinationAddressRequired, - DiskQuota, - FileTooBig, - InputOutput, - NoSpaceLeft, - AccessDenied, - OutOfMemory, - RenameAcrossMountPoints, - ReadOnlyFileSystem, - LinkQuotaExceeded, - FileBusy, -} || fs.File.OpenError; - -fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool) FmtError!void { - // get the real path here to avoid Windows failing on relative file paths with . or .. in them - var real_path = fs.realpathAlloc(fmt.allocator, file_path) catch |err| { - try stderr.print("unable to open '{}': {}\n", .{ file_path, err }); - fmt.any_error = true; - return; - }; - defer fmt.allocator.free(real_path); - - if (fmt.seen.exists(real_path)) return; - try fmt.seen.put(real_path); - - const source_code = fs.cwd().readFileAlloc(fmt.allocator, real_path, self_hosted_main.max_src_size) catch |err| switch (err) { - error.IsDir, error.AccessDenied => { - // TODO make event based (and dir.next()) - var dir = try fs.cwd().openDir(file_path, .{ .iterate = true }); - defer dir.close(); - - var dir_it = dir.iterate(); - - while (try dir_it.next()) |entry| { - if (entry.kind == .Directory or mem.endsWith(u8, entry.name, ".zig")) { - const full_path = try fs.path.join(fmt.allocator, &[_][]const u8{ file_path, entry.name }); - try fmtPath(fmt, full_path, check_mode); - } - } - return; - }, - else => { - // TODO lock stderr printing - try stderr.print("unable to open '{}': {}\n", .{ file_path, err }); - fmt.any_error = true; - return; - }, - }; - defer fmt.allocator.free(source_code); - - const tree = std.zig.parse(fmt.allocator, source_code) catch |err| { - try stderr.print("error parsing file '{}': {}\n", .{ file_path, err }); - fmt.any_error = true; - return; - }; - defer tree.deinit(); - - var error_it = tree.errors.iterator(0); - while (error_it.next()) |parse_error| { - try printErrMsgToFile(fmt.allocator, parse_error, tree, file_path, stderr_file, fmt.color); - } - if (tree.errors.len != 0) { - fmt.any_error = true; - return; - } - - if (check_mode) { - const anything_changed = try std.zig.render(fmt.allocator, io.null_out_stream, tree); - if (anything_changed) { - try stderr.print("{}\n", .{file_path}); - fmt.any_error = true; - } - } else { - const baf = try io.BufferedAtomicFile.create(fmt.allocator, fs.cwd(), real_path, .{}); - defer baf.destroy(); - - const anything_changed = try std.zig.render(fmt.allocator, baf.stream(), tree); - if (anything_changed) { - try stderr.print("{}\n", .{file_path}); - try baf.finish(); - } - } -} - -const Fmt = struct { - seen: SeenMap, - any_error: bool, - color: errmsg.Color, - allocator: *mem.Allocator, - - const SeenMap = std.BufSet; -}; - -fn printErrMsgToFile( - allocator: *mem.Allocator, - parse_error: *const ast.Error, - tree: *ast.Tree, - path: []const u8, - file: fs.File, - color: errmsg.Color, -) !void { - const color_on = switch (color) { - .Auto => file.isTty(), - .On => true, - .Off => false, - }; - const lok_token = parse_error.loc(); - const span = errmsg.Span{ - .first = lok_token, - .last = lok_token, - }; - - const first_token = tree.tokens.at(span.first); - const last_token = tree.tokens.at(span.last); - const start_loc = tree.tokenLocationPtr(0, first_token); - const end_loc = tree.tokenLocationPtr(first_token.end, last_token); - - var text_buf = std.ArrayList(u8).init(allocator); - defer text_buf.deinit(); - const out_stream = text_buf.outStream(); - try parse_error.render(&tree.tokens, out_stream); - const text = text_buf.span(); - - const stream = file.outStream(); - try stream.print("{}:{}:{}: error: {}\n", .{ path, start_loc.line + 1, start_loc.column + 1, text }); - - if (!color_on) return; - - // Print \r and \t as one space each so that column counts line up - for (tree.source[start_loc.line_start..start_loc.line_end]) |byte| { - try stream.writeByte(switch (byte) { - '\r', '\t' => ' ', - else => byte, - }); - } - try stream.writeByte('\n'); - try stream.writeByteNTimes(' ', start_loc.column); - try stream.writeByteNTimes('~', last_token.end - first_token.start); - try stream.writeByte('\n'); + return self_hosted_main.cmdFmt(allocator, args); } export fn stage2_DepTokenizer_init(input: [*]const u8, len: usize) stage2_DepTokenizer { diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig index 8186f1f4d8..451bba996a 100644 --- a/src-self-hosted/test.zig +++ b/src-self-hosted/test.zig @@ -1,17 +1,18 @@ const std = @import("std"); const link = @import("link.zig"); -const ir = @import("ir.zig"); +const Module = @import("Module.zig"); const Allocator = std.mem.Allocator; - -var global_ctx: TestContext = undefined; +const zir = @import("zir.zig"); +const Package = @import("Package.zig"); test "self-hosted" { - try global_ctx.init(); - defer global_ctx.deinit(); + var ctx: TestContext = undefined; + try ctx.init(); + defer ctx.deinit(); - try @import("stage2_tests").addCases(&global_ctx); + try @import("stage2_tests").addCases(&ctx); - try global_ctx.run(); + try ctx.run(); } pub const TestContext = struct { @@ -20,32 +21,34 @@ pub const TestContext = struct { pub const ZIRCompareOutputCase = struct { name: []const u8, - src: [:0]const u8, - expected_stdout: []const u8, + src_list: []const []const u8, + expected_stdout_list: []const []const u8, }; pub const ZIRTransformCase = struct { name: []const u8, src: [:0]const u8, expected_zir: []const u8, + cross_target: std.zig.CrossTarget, }; pub fn addZIRCompareOutput( ctx: *TestContext, name: []const u8, - src: [:0]const u8, - expected_stdout: []const u8, + src_list: []const []const u8, + expected_stdout_list: []const []const u8, ) void { ctx.zir_cmp_output_cases.append(.{ .name = name, - .src = src, - .expected_stdout = expected_stdout, + .src_list = src_list, + .expected_stdout_list = expected_stdout_list, }) catch unreachable; } pub fn addZIRTransform( ctx: *TestContext, name: []const u8, + cross_target: std.zig.CrossTarget, src: [:0]const u8, expected_zir: []const u8, ) void { @@ -53,6 +56,7 @@ pub const TestContext = struct { .name = name, .src = src, .expected_zir = expected_zir, + .cross_target = cross_target, }) catch unreachable; } @@ -84,7 +88,8 @@ pub const TestContext = struct { } for (self.zir_transform_cases.items) |case| { std.testing.base_allocator_instance.reset(); - try self.runOneZIRTransformCase(std.testing.allocator, root_node, case, native_info.target); + const info = try std.zig.system.NativeTargetInfo.detect(std.testing.allocator, case.cross_target); + try self.runOneZIRTransformCase(std.testing.allocator, root_node, case, info.target); try std.testing.allocator_instance.validate(); } } @@ -99,77 +104,68 @@ pub const TestContext = struct { var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); - var prg_node = root_node.start(case.name, 4); + const tmp_src_path = "test-case.zir"; + const root_pkg = try Package.create(allocator, tmp.dir, ".", tmp_src_path); + defer root_pkg.destroy(); + + var prg_node = root_node.start(case.name, case.src_list.len); prg_node.activate(); defer prg_node.end(); - var zir_module = x: { - var parse_node = prg_node.start("parse", null); - parse_node.activate(); - defer parse_node.end(); + var module = try Module.init(allocator, .{ + .target = target, + .output_mode = .Exe, + .optimize_mode = .Debug, + .bin_file_dir = tmp.dir, + .bin_file_path = "a.out", + .root_pkg = root_pkg, + }); + defer module.deinit(); - break :x try ir.text.parse(allocator, case.src); - }; - defer zir_module.deinit(allocator); - if (zir_module.errors.len != 0) { - debugPrintErrors(case.src, zir_module.errors); - return error.ParseFailure; + for (case.src_list) |source, i| { + var src_node = prg_node.start("update", 2); + src_node.activate(); + defer src_node.end(); + + try tmp.dir.writeFile(tmp_src_path, source); + + var update_node = src_node.start("parse,analysis,codegen", null); + update_node.activate(); + try module.makeBinFileWritable(); + try module.update(); + update_node.end(); + + var exec_result = x: { + var exec_node = src_node.start("execute", null); + exec_node.activate(); + defer exec_node.end(); + + try module.makeBinFileExecutable(); + break :x try std.ChildProcess.exec(.{ + .allocator = allocator, + .argv = &[_][]const u8{"./a.out"}, + .cwd_dir = tmp.dir, + }); + }; + defer allocator.free(exec_result.stdout); + defer allocator.free(exec_result.stderr); + switch (exec_result.term) { + .Exited => |code| { + if (code != 0) { + std.debug.warn("elf file exited with code {}\n", .{code}); + return error.BinaryBadExitCode; + } + }, + else => return error.BinaryCrashed, + } + const expected_stdout = case.expected_stdout_list[i]; + if (!std.mem.eql(u8, expected_stdout, exec_result.stdout)) { + std.debug.panic( + "update index {}, mismatched stdout\n====Expected (len={}):====\n{}\n====Actual (len={}):====\n{}\n========\n", + .{ i, expected_stdout.len, expected_stdout, exec_result.stdout.len, exec_result.stdout }, + ); + } } - - var analyzed_module = x: { - var analyze_node = prg_node.start("analyze", null); - analyze_node.activate(); - defer analyze_node.end(); - - break :x try ir.analyze(allocator, zir_module, .{ - .target = target, - .output_mode = .Exe, - .link_mode = .Static, - .optimize_mode = .Debug, - }); - }; - defer analyzed_module.deinit(allocator); - if (analyzed_module.errors.len != 0) { - debugPrintErrors(case.src, analyzed_module.errors); - return error.ParseFailure; - } - - var link_result = x: { - var link_node = prg_node.start("link", null); - link_node.activate(); - defer link_node.end(); - - break :x try link.updateFilePath(allocator, analyzed_module, tmp.dir, "a.out"); - }; - defer link_result.deinit(allocator); - if (link_result.errors.len != 0) { - debugPrintErrors(case.src, link_result.errors); - return error.LinkFailure; - } - - var exec_result = x: { - var exec_node = prg_node.start("execute", null); - exec_node.activate(); - defer exec_node.end(); - - break :x try std.ChildProcess.exec(.{ - .allocator = allocator, - .argv = &[_][]const u8{"./a.out"}, - .cwd_dir = tmp.dir, - }); - }; - defer allocator.free(exec_result.stdout); - defer allocator.free(exec_result.stderr); - switch (exec_result.term) { - .Exited => |code| { - if (code != 0) { - std.debug.warn("elf file exited with code {}\n", .{code}); - return error.BinaryBadExitCode; - } - }, - else => return error.BinaryCrashed, - } - std.testing.expectEqualSlices(u8, case.expected_stdout, exec_result.stdout); } fn runOneZIRTransformCase( @@ -179,38 +175,37 @@ pub const TestContext = struct { case: ZIRTransformCase, target: std.Target, ) !void { - var prg_node = root_node.start(case.name, 4); + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var prg_node = root_node.start(case.name, 3); prg_node.activate(); defer prg_node.end(); - var parse_node = prg_node.start("parse", null); - parse_node.activate(); - var zir_module = try ir.text.parse(allocator, case.src); - defer zir_module.deinit(allocator); - if (zir_module.errors.len != 0) { - debugPrintErrors(case.src, zir_module.errors); - return error.ParseFailure; - } - parse_node.end(); + const tmp_src_path = "test-case.zir"; + try tmp.dir.writeFile(tmp_src_path, case.src); - var analyze_node = prg_node.start("analyze", null); - analyze_node.activate(); - var analyzed_module = try ir.analyze(allocator, zir_module, .{ + const root_pkg = try Package.create(allocator, tmp.dir, ".", tmp_src_path); + defer root_pkg.destroy(); + + var module = try Module.init(allocator, .{ .target = target, .output_mode = .Obj, - .link_mode = .Static, .optimize_mode = .Debug, + .bin_file_dir = tmp.dir, + .bin_file_path = "test-case.o", + .root_pkg = root_pkg, }); - defer analyzed_module.deinit(allocator); - if (analyzed_module.errors.len != 0) { - debugPrintErrors(case.src, analyzed_module.errors); - return error.ParseFailure; - } - analyze_node.end(); + defer module.deinit(); + + var module_node = prg_node.start("parse/analysis/codegen", null); + module_node.activate(); + try module.update(); + module_node.end(); var emit_node = prg_node.start("emit", null); emit_node.activate(); - var new_zir_module = try ir.text.emit_zir(allocator, analyzed_module); + var new_zir_module = try zir.emit(allocator, module); defer new_zir_module.deinit(allocator); emit_node.end(); diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 25f726a680..84f1ed852d 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -5,8 +5,7 @@ const Allocator = std.mem.Allocator; const Target = std.Target; /// This is the raw data, with no bookkeeping, no memory awareness, no de-duplication. -/// It's important for this struct to be small. -/// It is not copyable since it may contain references to its inner data. +/// It's important for this type to be small. /// Types are not de-duplicated, which helps with multi-threading since it obviates the requirement /// of obtaining a lock on a global type table, as well as making the /// garbage collection bookkeeping simpler. @@ -51,7 +50,9 @@ pub const Type = extern union { .comptime_int => return .ComptimeInt, .comptime_float => return .ComptimeFloat, .noreturn => return .NoReturn, + .@"null" => return .Null, + .fn_noreturn_no_args => return .Fn, .fn_naked_noreturn_no_args => return .Fn, .fn_ccc_void_no_args => return .Fn, @@ -183,7 +184,10 @@ pub const Type = extern union { .noreturn, => return out_stream.writeAll(@tagName(t)), + .@"null" => return out_stream.writeAll("@TypeOf(null)"), + .const_slice_u8 => return out_stream.writeAll("[]const u8"), + .fn_noreturn_no_args => return out_stream.writeAll("fn() noreturn"), .fn_naked_noreturn_no_args => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), .fn_ccc_void_no_args => return out_stream.writeAll("fn() callconv(.C) void"), .single_const_pointer_to_comptime_int => return out_stream.writeAll("*const comptime_int"), @@ -244,6 +248,8 @@ pub const Type = extern union { .comptime_int => return Value.initTag(.comptime_int_type), .comptime_float => return Value.initTag(.comptime_float_type), .noreturn => return Value.initTag(.noreturn_type), + .@"null" => return Value.initTag(.null_type), + .fn_noreturn_no_args => return Value.initTag(.fn_noreturn_no_args_type), .fn_naked_noreturn_no_args => return Value.initTag(.fn_naked_noreturn_no_args_type), .fn_ccc_void_no_args => return Value.initTag(.fn_ccc_void_no_args_type), .single_const_pointer_to_comptime_int => return Value.initTag(.single_const_pointer_to_comptime_int_type), @@ -256,6 +262,110 @@ pub const Type = extern union { } } + pub fn hasCodeGenBits(self: Type) bool { + return switch (self.tag()) { + .u8, + .i8, + .isize, + .usize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .c_longdouble, + .f16, + .f32, + .f64, + .f128, + .bool, + .anyerror, + .fn_noreturn_no_args, + .fn_naked_noreturn_no_args, + .fn_ccc_void_no_args, + .single_const_pointer_to_comptime_int, + .const_slice_u8, + .array_u8_sentinel_0, + .array, // TODO check for zero bits + .single_const_pointer, + .int_signed, // TODO check for zero bits + .int_unsigned, // TODO check for zero bits + => true, + + .c_void, + .void, + .type, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + => false, + }; + } + + /// Asserts that hasCodeGenBits() is true. + pub fn abiAlignment(self: Type, target: Target) u32 { + return switch (self.tag()) { + .u8, + .i8, + .bool, + .fn_noreturn_no_args, // represents machine code; not a pointer + .fn_naked_noreturn_no_args, // represents machine code; not a pointer + .fn_ccc_void_no_args, // represents machine code; not a pointer + .array_u8_sentinel_0, + => return 1, + + .isize, + .usize, + .single_const_pointer_to_comptime_int, + .const_slice_u8, + .single_const_pointer, + => return @divExact(target.cpu.arch.ptrBitWidth(), 8), + + .c_short => return @divExact(CType.short.sizeInBits(target), 8), + .c_ushort => return @divExact(CType.ushort.sizeInBits(target), 8), + .c_int => return @divExact(CType.int.sizeInBits(target), 8), + .c_uint => return @divExact(CType.uint.sizeInBits(target), 8), + .c_long => return @divExact(CType.long.sizeInBits(target), 8), + .c_ulong => return @divExact(CType.ulong.sizeInBits(target), 8), + .c_longlong => return @divExact(CType.longlong.sizeInBits(target), 8), + .c_ulonglong => return @divExact(CType.ulonglong.sizeInBits(target), 8), + + .f16 => return 2, + .f32 => return 4, + .f64 => return 8, + .f128 => return 16, + .c_longdouble => return 16, + + .anyerror => return 2, // TODO revisit this when we have the concept of the error tag type + + .array => return self.cast(Payload.Array).?.elem_type.abiAlignment(target), + + .int_signed, .int_unsigned => { + const bits: u16 = if (self.cast(Payload.IntSigned)) |pl| + pl.bits + else if (self.cast(Payload.IntUnsigned)) |pl| + pl.bits + else + unreachable; + + return std.math.ceilPowerOfTwoPromote(u16, (bits + 7) / 8); + }, + + .c_void, + .void, + .type, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + => unreachable, + }; + } + pub fn isSinglePointer(self: Type) bool { return switch (self.tag()) { .u8, @@ -283,9 +393,11 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .array_u8_sentinel_0, .const_slice_u8, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -325,10 +437,12 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .array_u8_sentinel_0, .single_const_pointer, .single_const_pointer_to_comptime_int, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -367,8 +481,10 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .array_u8_sentinel_0, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -410,6 +526,8 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .int_unsigned, @@ -451,6 +569,8 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer, @@ -465,6 +585,50 @@ pub const Type = extern union { }; } + /// Asserts the type is an array or vector. + pub fn arraySentinel(self: Type) ?Value { + return switch (self.tag()) { + .u8, + .i8, + .isize, + .usize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .c_longdouble, + .f16, + .f32, + .f64, + .f128, + .c_void, + .bool, + .void, + .type, + .anyerror, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + .fn_noreturn_no_args, + .fn_naked_noreturn_no_args, + .fn_ccc_void_no_args, + .single_const_pointer, + .single_const_pointer_to_comptime_int, + .const_slice_u8, + .int_unsigned, + .int_signed, + => unreachable, + + .array => return null, + .array_u8_sentinel_0 => return Value.initTag(.zero), + }; + } + /// Returns true if and only if the type is a fixed-width, signed integer. pub fn isSignedInt(self: Type) bool { return switch (self.tag()) { @@ -481,6 +645,8 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .array, @@ -524,6 +690,8 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .array, @@ -579,6 +747,7 @@ pub const Type = extern union { /// Asserts the type is a function. pub fn fnParamLen(self: Type) usize { return switch (self.tag()) { + .fn_noreturn_no_args => 0, .fn_naked_noreturn_no_args => 0, .fn_ccc_void_no_args => 0, @@ -595,6 +764,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -622,6 +792,7 @@ pub const Type = extern union { /// given by `fnParamLen`. pub fn fnParamTypes(self: Type, types: []Type) void { switch (self.tag()) { + .fn_noreturn_no_args => return, .fn_naked_noreturn_no_args => return, .fn_ccc_void_no_args => return, @@ -638,6 +809,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -664,6 +836,7 @@ pub const Type = extern union { /// Asserts the type is a function. pub fn fnReturnType(self: Type) Type { return switch (self.tag()) { + .fn_noreturn_no_args => Type.initTag(.noreturn), .fn_naked_noreturn_no_args => Type.initTag(.noreturn), .fn_ccc_void_no_args => Type.initTag(.void), @@ -680,6 +853,7 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -706,6 +880,7 @@ pub const Type = extern union { /// Asserts the type is a function. pub fn fnCallingConvention(self: Type) std.builtin.CallingConvention { return switch (self.tag()) { + .fn_noreturn_no_args => .Unspecified, .fn_naked_noreturn_no_args => .Naked, .fn_ccc_void_no_args => .C, @@ -722,6 +897,51 @@ pub const Type = extern union { .comptime_int, .comptime_float, .noreturn, + .@"null", + .array, + .single_const_pointer, + .single_const_pointer_to_comptime_int, + .array_u8_sentinel_0, + .const_slice_u8, + .u8, + .i8, + .usize, + .isize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .int_unsigned, + .int_signed, + => unreachable, + }; + } + + /// Asserts the type is a function. + pub fn fnIsVarArgs(self: Type) bool { + return switch (self.tag()) { + .fn_noreturn_no_args => false, + .fn_naked_noreturn_no_args => false, + .fn_ccc_void_no_args => false, + + .f16, + .f32, + .f64, + .f128, + .c_longdouble, + .c_void, + .bool, + .void, + .type, + .anyerror, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", .array, .single_const_pointer, .single_const_pointer_to_comptime_int, @@ -776,6 +996,8 @@ pub const Type = extern union { .type, .anyerror, .noreturn, + .@"null", + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .array, @@ -812,6 +1034,7 @@ pub const Type = extern union { .bool, .type, .anyerror, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer_to_comptime_int, @@ -822,6 +1045,7 @@ pub const Type = extern union { .c_void, .void, .noreturn, + .@"null", => return true, .int_unsigned => return ty.cast(Payload.IntUnsigned).?.bits == 0, @@ -865,6 +1089,7 @@ pub const Type = extern union { .bool, .type, .anyerror, + .fn_noreturn_no_args, .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .single_const_pointer_to_comptime_int, @@ -873,6 +1098,7 @@ pub const Type = extern union { .c_void, .void, .noreturn, + .@"null", .int_unsigned, .int_signed, .array, @@ -902,11 +1128,11 @@ pub const Type = extern union { c_longlong, c_ulonglong, c_longdouble, - c_void, f16, f32, f64, f128, + c_void, bool, void, type, @@ -914,6 +1140,8 @@ pub const Type = extern union { comptime_int, comptime_float, noreturn, + @"null", + fn_noreturn_no_args, fn_naked_noreturn_no_args, fn_ccc_void_no_args, single_const_pointer_to_comptime_int, diff --git a/src-self-hosted/util.zig b/src-self-hosted/util.zig deleted file mode 100644 index 6585fd7c6f..0000000000 --- a/src-self-hosted/util.zig +++ /dev/null @@ -1,47 +0,0 @@ -const std = @import("std"); -const Target = std.Target; -const llvm = @import("llvm.zig"); - -pub fn getDarwinArchString(self: Target) [:0]const u8 { - switch (self.cpu.arch) { - .aarch64 => return "arm64", - .thumb, - .arm, - => return "arm", - .powerpc => return "ppc", - .powerpc64 => return "ppc64", - .powerpc64le => return "ppc64le", - // @tagName should be able to return sentinel terminated slice - else => @panic("TODO https://github.com/ziglang/zig/issues/3779"), //return @tagName(arch), - } -} - -pub fn llvmTargetFromTriple(triple: [:0]const u8) !*llvm.Target { - var result: *llvm.Target = undefined; - var err_msg: [*:0]u8 = undefined; - if (llvm.GetTargetFromTriple(triple, &result, &err_msg) != 0) { - std.debug.warn("triple: {s} error: {s}\n", .{ triple, err_msg }); - return error.UnsupportedTarget; - } - return result; -} - -pub fn initializeAllTargets() void { - llvm.InitializeAllTargets(); - llvm.InitializeAllTargetInfos(); - llvm.InitializeAllTargetMCs(); - llvm.InitializeAllAsmPrinters(); - llvm.InitializeAllAsmParsers(); -} - -pub fn getLLVMTriple(allocator: *std.mem.Allocator, target: std.Target) ![:0]u8 { - var result = try std.ArrayListSentineled(u8, 0).initSize(allocator, 0); - defer result.deinit(); - - try result.outStream().print( - "{}-unknown-{}-{}", - .{ @tagName(target.cpu.arch), @tagName(target.os.tag), @tagName(target.abi) }, - ); - - return result.toOwnedSlice(); -} diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 3d04e6e813..df438360c8 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -6,10 +6,11 @@ const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; const Target = std.Target; const Allocator = std.mem.Allocator; +const Module = @import("Module.zig"); /// This is the raw data, with no bookkeeping, no memory awareness, /// no de-duplication, and no type system awareness. -/// It's important for this struct to be small. +/// It's important for this type to be small. /// This union takes advantage of the fact that the first page of memory /// is unmapped, giving us 4096 possible enum tags that have no payload. pub const Value = extern union { @@ -45,6 +46,8 @@ pub const Value = extern union { comptime_int_type, comptime_float_type, noreturn_type, + null_type, + fn_noreturn_no_args_type, fn_naked_noreturn_no_args_type, fn_ccc_void_no_args_type, single_const_pointer_to_comptime_int_type, @@ -64,8 +67,9 @@ pub const Value = extern union { int_big_positive, int_big_negative, function, - ref, ref_val, + decl_ref, + elem_ptr, bytes, repeated, // the value is a value repeated some number of times @@ -136,6 +140,8 @@ pub const Value = extern union { .comptime_int_type => return out_stream.writeAll("comptime_int"), .comptime_float_type => return out_stream.writeAll("comptime_float"), .noreturn_type => return out_stream.writeAll("noreturn"), + .null_type => return out_stream.writeAll("@TypeOf(null)"), + .fn_noreturn_no_args_type => return out_stream.writeAll("fn() noreturn"), .fn_naked_noreturn_no_args_type => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), .fn_ccc_void_no_args_type => return out_stream.writeAll("fn() callconv(.C) void"), .single_const_pointer_to_comptime_int_type => return out_stream.writeAll("*const comptime_int"), @@ -153,11 +159,16 @@ pub const Value = extern union { .int_big_positive => return out_stream.print("{}", .{val.cast(Payload.IntBigPositive).?.asBigInt()}), .int_big_negative => return out_stream.print("{}", .{val.cast(Payload.IntBigNegative).?.asBigInt()}), .function => return out_stream.writeAll("(function)"), - .ref => return out_stream.writeAll("(ref)"), .ref_val => { - try out_stream.writeAll("*const "); - val = val.cast(Payload.RefVal).?.val; - continue; + const ref_val = val.cast(Payload.RefVal).?; + try out_stream.writeAll("&const "); + val = ref_val.val; + }, + .decl_ref => return out_stream.writeAll("(decl ref)"), + .elem_ptr => { + const elem_ptr = val.cast(Payload.ElemPtr).?; + try out_stream.print("&[{}] ", .{elem_ptr.index}); + val = elem_ptr.array_ptr; }, .bytes => return std.zig.renderStringLiteral(self.cast(Payload.Bytes).?.data, out_stream), .repeated => { @@ -169,10 +180,17 @@ pub const Value = extern union { /// Asserts that the value is representable as an array of bytes. /// Copies the value into a freshly allocated slice of memory, which is owned by the caller. - pub fn toAllocatedBytes(self: Value, allocator: *Allocator) Allocator.Error![]u8 { + pub fn toAllocatedBytes(self: Value, allocator: *Allocator) ![]u8 { if (self.cast(Payload.Bytes)) |bytes| { return std.mem.dupe(allocator, u8, bytes.data); } + if (self.cast(Payload.Repeated)) |repeated| { + @panic("TODO implement toAllocatedBytes for this Value tag"); + } + if (self.cast(Payload.DeclRef)) |declref| { + const val = try declref.decl.value(); + return val.toAllocatedBytes(allocator); + } unreachable; } @@ -181,31 +199,33 @@ pub const Value = extern union { return switch (self.tag()) { .ty => self.cast(Payload.Ty).?.ty, - .u8_type => Type.initTag(.@"u8"), - .i8_type => Type.initTag(.@"i8"), - .isize_type => Type.initTag(.@"isize"), - .usize_type => Type.initTag(.@"usize"), - .c_short_type => Type.initTag(.@"c_short"), - .c_ushort_type => Type.initTag(.@"c_ushort"), - .c_int_type => Type.initTag(.@"c_int"), - .c_uint_type => Type.initTag(.@"c_uint"), - .c_long_type => Type.initTag(.@"c_long"), - .c_ulong_type => Type.initTag(.@"c_ulong"), - .c_longlong_type => Type.initTag(.@"c_longlong"), - .c_ulonglong_type => Type.initTag(.@"c_ulonglong"), - .c_longdouble_type => Type.initTag(.@"c_longdouble"), - .f16_type => Type.initTag(.@"f16"), - .f32_type => Type.initTag(.@"f32"), - .f64_type => Type.initTag(.@"f64"), - .f128_type => Type.initTag(.@"f128"), - .c_void_type => Type.initTag(.@"c_void"), - .bool_type => Type.initTag(.@"bool"), - .void_type => Type.initTag(.@"void"), - .type_type => Type.initTag(.@"type"), - .anyerror_type => Type.initTag(.@"anyerror"), - .comptime_int_type => Type.initTag(.@"comptime_int"), - .comptime_float_type => Type.initTag(.@"comptime_float"), - .noreturn_type => Type.initTag(.@"noreturn"), + .u8_type => Type.initTag(.u8), + .i8_type => Type.initTag(.i8), + .isize_type => Type.initTag(.isize), + .usize_type => Type.initTag(.usize), + .c_short_type => Type.initTag(.c_short), + .c_ushort_type => Type.initTag(.c_ushort), + .c_int_type => Type.initTag(.c_int), + .c_uint_type => Type.initTag(.c_uint), + .c_long_type => Type.initTag(.c_long), + .c_ulong_type => Type.initTag(.c_ulong), + .c_longlong_type => Type.initTag(.c_longlong), + .c_ulonglong_type => Type.initTag(.c_ulonglong), + .c_longdouble_type => Type.initTag(.c_longdouble), + .f16_type => Type.initTag(.f16), + .f32_type => Type.initTag(.f32), + .f64_type => Type.initTag(.f64), + .f128_type => Type.initTag(.f128), + .c_void_type => Type.initTag(.c_void), + .bool_type => Type.initTag(.bool), + .void_type => Type.initTag(.void), + .type_type => Type.initTag(.type), + .anyerror_type => Type.initTag(.anyerror), + .comptime_int_type => Type.initTag(.comptime_int), + .comptime_float_type => Type.initTag(.comptime_float), + .noreturn_type => Type.initTag(.noreturn), + .null_type => Type.initTag(.@"null"), + .fn_noreturn_no_args_type => Type.initTag(.fn_noreturn_no_args), .fn_naked_noreturn_no_args_type => Type.initTag(.fn_naked_noreturn_no_args), .fn_ccc_void_no_args_type => Type.initTag(.fn_ccc_void_no_args), .single_const_pointer_to_comptime_int_type => Type.initTag(.single_const_pointer_to_comptime_int), @@ -222,8 +242,9 @@ pub const Value = extern union { .int_big_positive, .int_big_negative, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, => unreachable, @@ -259,6 +280,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -267,8 +290,9 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .undef, .repeated, @@ -314,6 +338,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -322,8 +348,9 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .undef, .repeated, @@ -370,6 +397,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -378,8 +407,9 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .undef, .repeated, @@ -431,6 +461,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -439,8 +471,9 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, => unreachable, @@ -521,6 +554,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -529,8 +564,9 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, .undef, @@ -573,6 +609,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -581,8 +619,9 @@ pub const Value = extern union { .bool_false, .null_value, .function, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, .undef, @@ -636,7 +675,8 @@ pub const Value = extern union { } /// Asserts the value is a pointer and dereferences it. - pub fn pointerDeref(self: Value) Value { + /// Returns error.AnalysisFail if the pointer points to a Decl that failed semantic analysis. + pub fn pointerDeref(self: Value, allocator: *Allocator) error{ AnalysisFail, OutOfMemory }!Value { return switch (self.tag()) { .ty, .u8_type, @@ -664,6 +704,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -683,14 +725,19 @@ pub const Value = extern union { => unreachable, .the_one_possible_value => Value.initTag(.the_one_possible_value), - .ref => self.cast(Payload.Ref).?.cell.contents, .ref_val => self.cast(Payload.RefVal).?.val, + .decl_ref => self.cast(Payload.DeclRef).?.decl.value(), + .elem_ptr => { + const elem_ptr = self.cast(Payload.ElemPtr).?; + const array_val = try elem_ptr.array_ptr.pointerDeref(allocator); + return array_val.elemValue(allocator, elem_ptr.index); + }, }; } /// Asserts the value is a single-item pointer to an array, or an array, /// or an unknown-length pointer, and returns the element value at the index. - pub fn elemValueAt(self: Value, allocator: *Allocator, index: usize) Allocator.Error!Value { + pub fn elemValue(self: Value, allocator: *Allocator, index: usize) error{OutOfMemory}!Value { switch (self.tag()) { .ty, .u8_type, @@ -718,6 +765,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -733,13 +782,13 @@ pub const Value = extern union { .int_big_positive, .int_big_negative, .undef, + .elem_ptr, + .ref_val, + .decl_ref, => unreachable, - .ref => @panic("TODO figure out how MemoryCell works"), - .ref_val => @panic("TODO figure out how MemoryCell works"), - .bytes => { - const int_payload = try allocator.create(Value.Payload.Int_u64); + const int_payload = try allocator.create(Payload.Int_u64); int_payload.* = .{ .int = self.cast(Payload.Bytes).?.data[index] }; return Value.initPayload(&int_payload.base); }, @@ -749,6 +798,17 @@ pub const Value = extern union { } } + /// Returns a pointer to the element value at the index. + pub fn elemPtr(self: Value, allocator: *Allocator, index: usize) !Value { + const payload = try allocator.create(Payload.ElemPtr); + if (self.cast(Payload.ElemPtr)) |elem_ptr| { + payload.* = .{ .array_ptr = elem_ptr.array_ptr, .index = elem_ptr.index + index }; + } else { + payload.* = .{ .array_ptr = self, .index = index }; + } + return Value.initPayload(&payload.base); + } + pub fn isUndef(self: Value) bool { return self.tag() == .undef; } @@ -783,6 +843,8 @@ pub const Value = extern union { .comptime_int_type, .comptime_float_type, .noreturn_type, + .null_type, + .fn_noreturn_no_args_type, .fn_naked_noreturn_no_args_type, .fn_ccc_void_no_args_type, .single_const_pointer_to_comptime_int_type, @@ -796,8 +858,9 @@ pub const Value = extern union { .int_i64, .int_big_positive, .int_big_negative, - .ref, .ref_val, + .decl_ref, + .elem_ptr, .bytes, .repeated, => false, @@ -841,8 +904,7 @@ pub const Value = extern union { pub const Function = struct { base: Payload = Payload{ .tag = .function }, - /// Index into the `fns` array of the `ir.Module` - index: usize, + func: *Module.Fn, }; pub const ArraySentinel0_u8_Type = struct { @@ -855,16 +917,24 @@ pub const Value = extern union { elem_type: *Type, }; - pub const Ref = struct { - base: Payload = Payload{ .tag = .ref }, - cell: *MemoryCell, - }; - + /// Represents a pointer to another immutable value. pub const RefVal = struct { base: Payload = Payload{ .tag = .ref_val }, val: Value, }; + /// Represents a pointer to a decl, not the value of the decl. + pub const DeclRef = struct { + base: Payload = Payload{ .tag = .decl_ref }, + decl: *Module.Decl, + }; + + pub const ElemPtr = struct { + base: Payload = Payload{ .tag = .elem_ptr }, + array_ptr: Value, + index: usize, + }; + pub const Bytes = struct { base: Payload = Payload{ .tag = .bytes }, data: []const u8, @@ -890,29 +960,3 @@ pub const Value = extern union { limbs: [(@sizeOf(u64) / @sizeOf(std.math.big.Limb)) + 1]std.math.big.Limb, }; }; - -/// This is the heart of resource management of the Zig compiler. The Zig compiler uses -/// stop-the-world mark-and-sweep garbage collection during compilation to manage the resources -/// associated with evaluating compile-time code and semantic analysis. Each `MemoryCell` represents -/// a root. -pub const MemoryCell = struct { - parent: Parent, - contents: Value, - - pub const Parent = union(enum) { - none, - struct_field: struct { - struct_base: *MemoryCell, - field_index: usize, - }, - array_elem: struct { - array_base: *MemoryCell, - elem_index: usize, - }, - union_field: *MemoryCell, - err_union_code: *MemoryCell, - err_union_payload: *MemoryCell, - optional_payload: *MemoryCell, - optional_flag: *MemoryCell, - }; -}; diff --git a/src-self-hosted/visib.zig b/src-self-hosted/visib.zig deleted file mode 100644 index 3704600cca..0000000000 --- a/src-self-hosted/visib.zig +++ /dev/null @@ -1,4 +0,0 @@ -pub const Visib = enum { - Private, - Pub, -}; diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/zir.zig similarity index 77% rename from src-self-hosted/ir/text.zig rename to src-self-hosted/zir.zig index 1efcd6f599..b3673b58ad 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/zir.zig @@ -6,9 +6,11 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const BigIntConst = std.math.big.int.Const; const BigIntMutable = std.math.big.int.Mutable; -const Type = @import("../type.zig").Type; -const Value = @import("../value.zig").Value; -const ir = @import("../ir.zig"); +const Type = @import("type.zig").Type; +const Value = @import("value.zig").Value; +const TypedValue = @import("TypedValue.zig"); +const ir = @import("ir.zig"); +const IrModule = @import("Module.zig"); /// These are instructions that correspond to the ZIR text format. See `ir.Inst` for /// in-memory, analyzed instructions with types and values. @@ -16,10 +18,18 @@ pub const Inst = struct { tag: Tag, /// Byte offset into the source. src: usize, + name: []const u8, + + /// Slice into the source of the part after the = and before the next instruction. + contents: []const u8 = &[0]u8{}, /// These names are used directly as the instruction names in the text format. pub const Tag = enum { breakpoint, + call, + /// Represents a reference to a global decl by name. + /// The syntax `@foo` is equivalent to `declref("foo")`. + declref, str, int, ptrtoint, @@ -32,6 +42,7 @@ pub const Inst = struct { @"fn", @"export", primitive, + ref, fntype, intcast, bitcast, @@ -46,6 +57,8 @@ pub const Inst = struct { pub fn TagToType(tag: Tag) type { return switch (tag) { .breakpoint => Breakpoint, + .call => Call, + .declref => DeclRef, .str => Str, .int => Int, .ptrtoint => PtrToInt, @@ -58,6 +71,7 @@ pub const Inst = struct { .@"fn" => Fn, .@"export" => Export, .primitive => Primitive, + .ref => Ref, .fntype => FnType, .intcast => IntCast, .bitcast => BitCast, @@ -85,6 +99,29 @@ pub const Inst = struct { kw_args: struct {}, }; + pub const Call = struct { + pub const base_tag = Tag.call; + base: Inst, + + positionals: struct { + func: *Inst, + args: []*Inst, + }, + kw_args: struct { + modifier: std.builtin.CallOptions.Modifier = .auto, + }, + }; + + pub const DeclRef = struct { + pub const base_tag = Tag.declref; + base: Inst, + + positionals: struct { + name: *Inst, + }, + kw_args: struct {}, + }; + pub const Str = struct { pub const base_tag = Tag.str; base: Inst, @@ -202,6 +239,16 @@ pub const Inst = struct { kw_args: struct {}, }; + pub const Ref = struct { + pub const base_tag = Tag.ref; + base: Inst, + + positionals: struct { + operand: *Inst, + }, + kw_args: struct {}, + }; + pub const Primitive = struct { pub const base_tag = Tag.primitive; base: Inst, @@ -212,55 +259,55 @@ pub const Inst = struct { kw_args: struct {}, pub const BuiltinType = enum { - @"isize", - @"usize", - @"c_short", - @"c_ushort", - @"c_int", - @"c_uint", - @"c_long", - @"c_ulong", - @"c_longlong", - @"c_ulonglong", - @"c_longdouble", - @"c_void", - @"f16", - @"f32", - @"f64", - @"f128", - @"bool", - @"void", - @"noreturn", - @"type", - @"anyerror", - @"comptime_int", - @"comptime_float", + isize, + usize, + c_short, + c_ushort, + c_int, + c_uint, + c_long, + c_ulong, + c_longlong, + c_ulonglong, + c_longdouble, + c_void, + f16, + f32, + f64, + f128, + bool, + void, + noreturn, + type, + anyerror, + comptime_int, + comptime_float, pub fn toType(self: BuiltinType) Type { return switch (self) { - .@"isize" => Type.initTag(.@"isize"), - .@"usize" => Type.initTag(.@"usize"), - .@"c_short" => Type.initTag(.@"c_short"), - .@"c_ushort" => Type.initTag(.@"c_ushort"), - .@"c_int" => Type.initTag(.@"c_int"), - .@"c_uint" => Type.initTag(.@"c_uint"), - .@"c_long" => Type.initTag(.@"c_long"), - .@"c_ulong" => Type.initTag(.@"c_ulong"), - .@"c_longlong" => Type.initTag(.@"c_longlong"), - .@"c_ulonglong" => Type.initTag(.@"c_ulonglong"), - .@"c_longdouble" => Type.initTag(.@"c_longdouble"), - .@"c_void" => Type.initTag(.@"c_void"), - .@"f16" => Type.initTag(.@"f16"), - .@"f32" => Type.initTag(.@"f32"), - .@"f64" => Type.initTag(.@"f64"), - .@"f128" => Type.initTag(.@"f128"), - .@"bool" => Type.initTag(.@"bool"), - .@"void" => Type.initTag(.@"void"), - .@"noreturn" => Type.initTag(.@"noreturn"), - .@"type" => Type.initTag(.@"type"), - .@"anyerror" => Type.initTag(.@"anyerror"), - .@"comptime_int" => Type.initTag(.@"comptime_int"), - .@"comptime_float" => Type.initTag(.@"comptime_float"), + .isize => Type.initTag(.isize), + .usize => Type.initTag(.usize), + .c_short => Type.initTag(.c_short), + .c_ushort => Type.initTag(.c_ushort), + .c_int => Type.initTag(.c_int), + .c_uint => Type.initTag(.c_uint), + .c_long => Type.initTag(.c_long), + .c_ulong => Type.initTag(.c_ulong), + .c_longlong => Type.initTag(.c_longlong), + .c_ulonglong => Type.initTag(.c_ulonglong), + .c_longdouble => Type.initTag(.c_longdouble), + .c_void => Type.initTag(.c_void), + .f16 => Type.initTag(.f16), + .f32 => Type.initTag(.f32), + .f64 => Type.initTag(.f64), + .f128 => Type.initTag(.f128), + .bool => Type.initTag(.bool), + .void => Type.initTag(.void), + .noreturn => Type.initTag(.noreturn), + .type => Type.initTag(.type), + .anyerror => Type.initTag(.anyerror), + .comptime_int => Type.initTag(.comptime_int), + .comptime_float => Type.initTag(.comptime_float), }; } }; @@ -375,8 +422,8 @@ pub const ErrorMsg = struct { pub const Module = struct { decls: []*Inst, - errors: []ErrorMsg, arena: std.heap.ArenaAllocator, + error_msg: ?ErrorMsg = null, pub const Body = struct { instructions: []*Inst, @@ -384,7 +431,6 @@ pub const Module = struct { pub fn deinit(self: *Module, allocator: *Allocator) void { allocator.free(self.decls); - allocator.free(self.errors); self.arena.deinit(); self.* = undefined; } @@ -431,6 +477,8 @@ pub const Module = struct { // TODO I tried implementing this with an inline for loop and hit a compiler bug switch (decl.tag) { .breakpoint => return self.writeInstToStreamGeneric(stream, .breakpoint, decl, inst_table), + .call => return self.writeInstToStreamGeneric(stream, .call, decl, inst_table), + .declref => return self.writeInstToStreamGeneric(stream, .declref, decl, inst_table), .str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table), .int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table), .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table), @@ -442,6 +490,7 @@ pub const Module = struct { .@"return" => return self.writeInstToStreamGeneric(stream, .@"return", decl, inst_table), .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table), .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table), + .ref => return self.writeInstToStreamGeneric(stream, .ref, decl, inst_table), .primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table), .fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table), .intcast => return self.writeInstToStreamGeneric(stream, .intcast, decl, inst_table), @@ -543,22 +592,23 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module .arena = std.heap.ArenaAllocator.init(allocator), .i = 0, .source = source, - .decls = std.ArrayList(*Inst).init(allocator), - .errors = std.ArrayList(ErrorMsg).init(allocator), .global_name_map = &global_name_map, + .decls = .{}, + .unnamed_index = 0, }; errdefer parser.arena.deinit(); parser.parseRoot() catch |err| switch (err) { error.ParseFailure => { - assert(parser.errors.items.len != 0); + assert(parser.error_msg != null); }, else => |e| return e, }; + return Module{ - .decls = parser.decls.toOwnedSlice(), - .errors = parser.errors.toOwnedSlice(), + .decls = parser.decls.toOwnedSlice(allocator), .arena = parser.arena, + .error_msg = parser.error_msg, }; } @@ -567,9 +617,10 @@ const Parser = struct { arena: std.heap.ArenaAllocator, i: usize, source: [:0]const u8, - errors: std.ArrayList(ErrorMsg), - decls: std.ArrayList(*Inst), + decls: std.ArrayListUnmanaged(*Inst), global_name_map: *std.StringHashMap(usize), + error_msg: ?ErrorMsg = null, + unnamed_index: usize, const Body = struct { instructions: std.ArrayList(*Inst), @@ -595,7 +646,7 @@ const Parser = struct { skipSpace(self); try requireEatBytes(self, "="); skipSpace(self); - const inst = try parseInstruction(self, &body_context); + const inst = try parseInstruction(self, &body_context, ident); const ident_index = body_context.instructions.items.len; if (try body_context.name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); @@ -681,12 +732,12 @@ const Parser = struct { skipSpace(self); try requireEatBytes(self, "="); skipSpace(self); - const inst = try parseInstruction(self, null); + const inst = try parseInstruction(self, null, ident); const ident_index = self.decls.items.len; if (try self.global_name_map.put(ident, ident_index)) |_| { return self.fail("redefinition of identifier '{}'", .{ident}); } - try self.decls.append(inst); + try self.decls.append(self.allocator, inst); }, ' ', '\n' => self.i += 1, 0 => break, @@ -743,20 +794,20 @@ const Parser = struct { fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError { @setCold(true); - const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args); - (try self.errors.addOne()).* = .{ + self.error_msg = ErrorMsg{ .byte_offset = self.i, - .msg = msg, + .msg = try std.fmt.allocPrint(&self.arena.allocator, format, args), }; return error.ParseFailure; } - fn parseInstruction(self: *Parser, body_ctx: ?*Body) InnerError!*Inst { + fn parseInstruction(self: *Parser, body_ctx: ?*Body, name: []const u8) InnerError!*Inst { + const contents_start = self.i; const fn_name = try skipToAndOver(self, '('); inline for (@typeInfo(Inst.Tag).Enum.fields) |field| { if (mem.eql(u8, field.name, fn_name)) { const tag = @field(Inst.Tag, field.name); - return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx); + return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name, contents_start); } } return self.fail("unknown instruction '{}'", .{fn_name}); @@ -767,9 +818,12 @@ const Parser = struct { comptime fn_name: []const u8, comptime InstType: type, body_ctx: ?*Body, - ) !*Inst { + inst_name: []const u8, + contents_start: usize, + ) InnerError!*Inst { const inst_specific = try self.arena.allocator.create(InstType); inst_specific.base = .{ + .name = inst_name, .src = self.i, .tag = InstType.base_tag, }; @@ -819,6 +873,8 @@ const Parser = struct { } try requireEatBytes(self, ")"); + inst_specific.base.contents = self.source[contents_start..self.i]; + return &inst_specific.base; } @@ -893,8 +949,33 @@ const Parser = struct { const ident = self.source[name_start..self.i]; const kv = map.get(ident) orelse { const bad_name = self.source[name_start - 1 .. self.i]; - self.i = name_start - 1; - return self.fail("unrecognized identifier: {}", .{bad_name}); + const src = name_start - 1; + if (local_ref) { + self.i = src; + return self.fail("unrecognized identifier: {}", .{bad_name}); + } else { + const name = try self.arena.allocator.create(Inst.Str); + name.* = .{ + .base = .{ + .name = try self.generateName(), + .src = src, + .tag = Inst.Str.base_tag, + }, + .positionals = .{ .bytes = ident }, + .kw_args = .{}, + }; + const declref = try self.arena.allocator.create(Inst.DeclRef); + declref.* = .{ + .base = .{ + .name = try self.generateName(), + .src = src, + .tag = Inst.DeclRef.base_tag, + }, + .positionals = .{ .name = &name.base }, + .kw_args = .{}, + }; + return &declref.base; + } }; if (local_ref) { return body_ctx.?.instructions.items[kv.value]; @@ -902,50 +983,64 @@ const Parser = struct { return self.decls.items[kv.value]; } } + + fn generateName(self: *Parser) ![]u8 { + const result = try std.fmt.allocPrint(&self.arena.allocator, "unnamed${}", .{self.unnamed_index}); + self.unnamed_index += 1; + return result; + } }; -pub fn emit_zir(allocator: *Allocator, old_module: ir.Module) !Module { +pub fn emit(allocator: *Allocator, old_module: IrModule) !Module { var ctx: EmitZIR = .{ .allocator = allocator, - .decls = std.ArrayList(*Inst).init(allocator), + .decls = .{}, .decl_table = std.AutoHashMap(*ir.Inst, *Inst).init(allocator), .arena = std.heap.ArenaAllocator.init(allocator), .old_module = &old_module, }; - defer ctx.decls.deinit(); + defer ctx.decls.deinit(allocator); defer ctx.decl_table.deinit(); errdefer ctx.arena.deinit(); try ctx.emit(); return Module{ - .decls = ctx.decls.toOwnedSlice(), + .decls = ctx.decls.toOwnedSlice(allocator), .arena = ctx.arena, - .errors = &[0]ErrorMsg{}, }; } const EmitZIR = struct { allocator: *Allocator, arena: std.heap.ArenaAllocator, - old_module: *const ir.Module, - decls: std.ArrayList(*Inst), + old_module: *const IrModule, + decls: std.ArrayListUnmanaged(*Inst), decl_table: std.AutoHashMap(*ir.Inst, *Inst), fn emit(self: *EmitZIR) !void { - for (self.old_module.exports) |module_export| { - const export_value = try self.emitTypedValue(module_export.src, module_export.typed_value); - const symbol_name = try self.emitStringLiteral(module_export.src, module_export.name); - const export_inst = try self.arena.allocator.create(Inst.Export); - export_inst.* = .{ - .base = .{ .src = module_export.src, .tag = Inst.Export.base_tag }, - .positionals = .{ - .symbol_name = symbol_name, - .value = export_value, - }, - .kw_args = .{}, - }; - try self.decls.append(&export_inst.base); + var it = self.old_module.decl_exports.iterator(); + while (it.next()) |kv| { + const decl = kv.key; + const exports = kv.value; + const export_value = try self.emitTypedValue(decl.src, decl.typed_value.most_recent.typed_value); + for (exports) |module_export| { + const symbol_name = try self.emitStringLiteral(module_export.src, module_export.options.name); + const export_inst = try self.arena.allocator.create(Inst.Export); + export_inst.* = .{ + .base = .{ + .name = try self.autoName(), + .src = module_export.src, + .tag = Inst.Export.base_tag, + }, + .positionals = .{ + .symbol_name = symbol_name, + .value = export_value, + }, + .kw_args = .{}, + }; + try self.decls.append(self.allocator, &export_inst.base); + } } } @@ -966,17 +1061,22 @@ const EmitZIR = struct { const big_int_space = try self.arena.allocator.create(Value.BigIntSpace); const int_inst = try self.arena.allocator.create(Inst.Int); int_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Int.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Int.base_tag, + }, .positionals = .{ .int = val.toBigInt(big_int_space), }, .kw_args = .{}, }; - try self.decls.append(&int_inst.base); + try self.decls.append(self.allocator, &int_inst.base); return &int_inst.base; } - fn emitTypedValue(self: *EmitZIR, src: usize, typed_value: ir.TypedValue) Allocator.Error!*Inst { + fn emitTypedValue(self: *EmitZIR, src: usize, typed_value: TypedValue) Allocator.Error!*Inst { + const allocator = &self.arena.allocator; switch (typed_value.ty.zigTypeTag()) { .Pointer => { const ptr_elem_type = typed_value.ty.elemType(); @@ -988,7 +1088,10 @@ const EmitZIR = struct { // ptr_elem_type.hasSentinel(Value.initTag(.zero))) //{ //} - const bytes = try typed_value.val.toAllocatedBytes(&self.arena.allocator); + const bytes = typed_value.val.toAllocatedBytes(allocator) catch |err| switch (err) { + error.AnalysisFail => unreachable, + else => |e| return e, + }; return self.emitStringLiteral(src, bytes); }, else => |t| std.debug.panic("TODO implement emitTypedValue for pointer to {}", .{@tagName(t)}), @@ -998,14 +1101,18 @@ const EmitZIR = struct { .Int => { const as_inst = try self.arena.allocator.create(Inst.As); as_inst.* = .{ - .base = .{ .src = src, .tag = Inst.As.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.As.base_tag, + }, .positionals = .{ .dest_type = try self.emitType(src, typed_value.ty), .value = try self.emitComptimeIntVal(src, typed_value.val), }, .kw_args = .{}, }; - try self.decls.append(&as_inst.base); + try self.decls.append(self.allocator, &as_inst.base); return &as_inst.base; }, @@ -1014,8 +1121,7 @@ const EmitZIR = struct { return self.emitType(src, ty); }, .Fn => { - const index = typed_value.val.cast(Value.Payload.Function).?.index; - const module_fn = self.old_module.fns[index]; + const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; var inst_table = std.AutoHashMap(*ir.Inst, *Inst).init(self.allocator); defer inst_table.deinit(); @@ -1023,7 +1129,7 @@ const EmitZIR = struct { var instructions = std.ArrayList(*Inst).init(self.allocator); defer instructions.deinit(); - try self.emitBody(module_fn.body, &inst_table, &instructions); + try self.emitBody(module_fn.analysis.success, &inst_table, &instructions); const fn_type = try self.emitType(src, module_fn.fn_type); @@ -1032,14 +1138,18 @@ const EmitZIR = struct { const fn_inst = try self.arena.allocator.create(Inst.Fn); fn_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Fn.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Fn.base_tag, + }, .positionals = .{ .fn_type = fn_type, .body = .{ .instructions = arena_instrs }, }, .kw_args = .{}, }; - try self.decls.append(&fn_inst.base); + try self.decls.append(self.allocator, &fn_inst.base); return &fn_inst.base; }, else => |t| std.debug.panic("TODO implement emitTypedValue for {}", .{@tagName(t)}), @@ -1049,7 +1159,11 @@ const EmitZIR = struct { fn emitTrivial(self: *EmitZIR, src: usize, comptime T: type) Allocator.Error!*Inst { const new_inst = try self.arena.allocator.create(T); new_inst.* = .{ - .base = .{ .src = src, .tag = T.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = T.base_tag, + }, .positionals = .{}, .kw_args = .{}, }; @@ -1058,13 +1172,35 @@ const EmitZIR = struct { fn emitBody( self: *EmitZIR, - body: ir.Module.Body, + body: IrModule.Body, inst_table: *std.AutoHashMap(*ir.Inst, *Inst), instructions: *std.ArrayList(*Inst), ) Allocator.Error!void { for (body.instructions) |inst| { const new_inst = switch (inst.tag) { .breakpoint => try self.emitTrivial(inst.src, Inst.Breakpoint), + .call => blk: { + const old_inst = inst.cast(ir.Inst.Call).?; + const new_inst = try self.arena.allocator.create(Inst.Call); + + const args = try self.arena.allocator.alloc(*Inst, old_inst.args.args.len); + for (args) |*elem, i| { + elem.* = try self.resolveInst(inst_table, old_inst.args.args[i]); + } + new_inst.* = .{ + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.Call.base_tag, + }, + .positionals = .{ + .func = try self.resolveInst(inst_table, old_inst.args.func), + .args = args, + }, + .kw_args = .{}, + }; + break :blk &new_inst.base; + }, .unreach => try self.emitTrivial(inst.src, Inst.Unreachable), .ret => try self.emitTrivial(inst.src, Inst.Return), .constant => unreachable, // excluded from function bodies @@ -1088,7 +1224,11 @@ const EmitZIR = struct { } new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.Asm.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.Asm.base_tag, + }, .positionals = .{ .asm_source = try self.emitStringLiteral(inst.src, old_inst.args.asm_source), .return_type = try self.emitType(inst.src, inst.ty), @@ -1110,7 +1250,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.PtrToInt).?; const new_inst = try self.arena.allocator.create(Inst.PtrToInt); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.PtrToInt.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.PtrToInt.base_tag, + }, .positionals = .{ .ptr = try self.resolveInst(inst_table, old_inst.args.ptr), }, @@ -1122,7 +1266,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.BitCast).?; const new_inst = try self.arena.allocator.create(Inst.BitCast); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.BitCast.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.BitCast.base_tag, + }, .positionals = .{ .dest_type = try self.emitType(inst.src, inst.ty), .operand = try self.resolveInst(inst_table, old_inst.args.operand), @@ -1135,7 +1283,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.Cmp).?; const new_inst = try self.arena.allocator.create(Inst.Cmp); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.Cmp.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.Cmp.base_tag, + }, .positionals = .{ .lhs = try self.resolveInst(inst_table, old_inst.args.lhs), .rhs = try self.resolveInst(inst_table, old_inst.args.rhs), @@ -1159,7 +1311,11 @@ const EmitZIR = struct { const new_inst = try self.arena.allocator.create(Inst.CondBr); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.CondBr.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.CondBr.base_tag, + }, .positionals = .{ .condition = try self.resolveInst(inst_table, old_inst.args.condition), .true_body = .{ .instructions = true_body.toOwnedSlice() }, @@ -1173,7 +1329,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.IsNull).?; const new_inst = try self.arena.allocator.create(Inst.IsNull); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.IsNull.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.IsNull.base_tag, + }, .positionals = .{ .operand = try self.resolveInst(inst_table, old_inst.args.operand), }, @@ -1185,7 +1345,11 @@ const EmitZIR = struct { const old_inst = inst.cast(ir.Inst.IsNonNull).?; const new_inst = try self.arena.allocator.create(Inst.IsNonNull); new_inst.* = .{ - .base = .{ .src = inst.src, .tag = Inst.IsNonNull.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = inst.src, + .tag = Inst.IsNonNull.base_tag, + }, .positionals = .{ .operand = try self.resolveInst(inst_table, old_inst.args.operand), }, @@ -1237,7 +1401,11 @@ const EmitZIR = struct { const fntype_inst = try self.arena.allocator.create(Inst.FnType); fntype_inst.* = .{ - .base = .{ .src = src, .tag = Inst.FnType.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.FnType.base_tag, + }, .positionals = .{ .param_types = emitted_params, .return_type = try self.emitType(src, ty.fnReturnType()), @@ -1246,7 +1414,7 @@ const EmitZIR = struct { .cc = ty.fnCallingConvention(), }, }; - try self.decls.append(&fntype_inst.base); + try self.decls.append(self.allocator, &fntype_inst.base); return &fntype_inst.base; }, else => std.debug.panic("TODO implement emitType for {}", .{ty}), @@ -1254,29 +1422,56 @@ const EmitZIR = struct { } } + fn autoName(self: *EmitZIR) ![]u8 { + return std.fmt.allocPrint(&self.arena.allocator, "{}", .{self.decls.items.len}); + } + fn emitPrimitiveType(self: *EmitZIR, src: usize, tag: Inst.Primitive.BuiltinType) !*Inst { const primitive_inst = try self.arena.allocator.create(Inst.Primitive); primitive_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Primitive.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Primitive.base_tag, + }, .positionals = .{ .tag = tag, }, .kw_args = .{}, }; - try self.decls.append(&primitive_inst.base); + try self.decls.append(self.allocator, &primitive_inst.base); return &primitive_inst.base; } fn emitStringLiteral(self: *EmitZIR, src: usize, str: []const u8) !*Inst { const str_inst = try self.arena.allocator.create(Inst.Str); str_inst.* = .{ - .base = .{ .src = src, .tag = Inst.Str.base_tag }, + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Str.base_tag, + }, .positionals = .{ .bytes = str, }, .kw_args = .{}, }; - try self.decls.append(&str_inst.base); - return &str_inst.base; + try self.decls.append(self.allocator, &str_inst.base); + + const ref_inst = try self.arena.allocator.create(Inst.Ref); + ref_inst.* = .{ + .base = .{ + .name = try self.autoName(), + .src = src, + .tag = Inst.Ref.base_tag, + }, + .positionals = .{ + .operand = &str_inst.base, + }, + .kw_args = .{}, + }; + try self.decls.append(self.allocator, &ref_inst.base); + + return &ref_inst.base; } }; diff --git a/src/codegen.cpp b/src/codegen.cpp index bf0e004dda..bc0e731bea 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1794,6 +1794,16 @@ static LLVMValueRef ir_llvm_value(CodeGen *g, IrInstGen *instruction) { } void codegen_report_errors_and_exit(CodeGen *g) { + // Clear progress indicator before printing errors + if (g->sub_progress_node != nullptr) { + stage2_progress_end(g->sub_progress_node); + g->sub_progress_node = nullptr; + } + if (g->main_progress_node != nullptr) { + stage2_progress_end(g->main_progress_node); + g->main_progress_node = nullptr; + } + assert(g->errors.length != 0); for (size_t i = 0; i < g->errors.length; i += 1) { ErrorMsg *err = g->errors.at(i); diff --git a/test/stage2/zir.zig b/test/stage2/zir.zig index 9a65e9ab96..afee3e7895 100644 --- a/test/stage2/zir.zig +++ b/test/stage2/zir.zig @@ -1,7 +1,15 @@ +const std = @import("std"); const TestContext = @import("../../src-self-hosted/test.zig").TestContext; +// self-hosted does not yet support PE executable files / COFF object files +// or mach-o files. So we do the ZIR transform test cases cross compiling for +// x86_64-linux. +const linux_x64 = std.zig.CrossTarget{ + .cpu_arch = .x86_64, + .os_tag = .linux, +}; pub fn addCases(ctx: *TestContext) void { - ctx.addZIRTransform("elemptr, add, cmp, condbr, return, breakpoint", + ctx.addZIRTransform("elemptr, add, cmp, condbr, return, breakpoint", linux_x64, \\@void = primitive(void) \\@usize = primitive(usize) \\@fnty = fntype([], @void, cc=C) @@ -12,10 +20,11 @@ pub fn addCases(ctx: *TestContext) void { \\ \\@entry = fn(@fnty, { \\ %a = str("\x32\x08\x01\x0a") - \\ %eptr0 = elemptr(%a, @0) - \\ %eptr1 = elemptr(%a, @1) - \\ %eptr2 = elemptr(%a, @2) - \\ %eptr3 = elemptr(%a, @3) + \\ %aref = ref(%a) + \\ %eptr0 = elemptr(%aref, @0) + \\ %eptr1 = elemptr(%aref, @1) + \\ %eptr2 = elemptr(%aref, @2) + \\ %eptr3 = elemptr(%aref, @3) \\ %v0 = deref(%eptr0) \\ %v1 = deref(%eptr1) \\ %v2 = deref(%eptr2) @@ -34,7 +43,8 @@ pub fn addCases(ctx: *TestContext) void { \\}) \\ \\@9 = str("entry") - \\@10 = export(@9, @entry) + \\@10 = ref(@9) + \\@11 = export(@10, @entry) , \\@0 = primitive(void) \\@1 = fntype([], @0, cc=C) @@ -42,66 +52,161 @@ pub fn addCases(ctx: *TestContext) void { \\ %0 = return() \\}) \\@3 = str("entry") - \\@4 = export(@3, @2) + \\@4 = ref(@3) + \\@5 = export(@4, @2) \\ ); - if (@import("std").Target.current.os.tag != .linux or - @import("std").Target.current.cpu.arch != .x86_64) + if (std.Target.current.os.tag != .linux or + std.Target.current.cpu.arch != .x86_64) { // TODO implement self-hosted PE (.exe file) linking // TODO implement more ZIR so we don't depend on x86_64-linux return; } - ctx.addZIRCompareOutput("hello world ZIR", - \\@0 = str("Hello, world!\n") - \\@1 = primitive(noreturn) - \\@2 = primitive(usize) - \\@3 = fntype([], @1, cc=Naked) - \\@4 = int(0) - \\@5 = int(1) - \\@6 = int(231) - \\@7 = str("len") - \\ - \\@8 = fn(@3, { - \\ %0 = as(@2, @5) ; SYS_write - \\ %1 = as(@2, @5) ; STDOUT_FILENO - \\ %2 = ptrtoint(@0) ; msg ptr - \\ %3 = fieldptr(@0, @7) ; msg len ptr - \\ %4 = deref(%3) ; msg len - \\ %sysoutreg = str("={rax}") - \\ %rax = str("{rax}") - \\ %rdi = str("{rdi}") - \\ %rsi = str("{rsi}") - \\ %rdx = str("{rdx}") - \\ %rcx = str("rcx") - \\ %r11 = str("r11") - \\ %memory = str("memory") - \\ %syscall = str("syscall") - \\ %5 = asm(%syscall, @2, - \\ volatile=1, - \\ output=%sysoutreg, - \\ inputs=[%rax, %rdi, %rsi, %rdx], - \\ clobbers=[%rcx, %r11, %memory], - \\ args=[%0, %1, %2, %4]) - \\ - \\ %6 = as(@2, @6) ;SYS_exit_group - \\ %7 = as(@2, @4) ;exit code - \\ %8 = asm(%syscall, @2, - \\ volatile=1, - \\ output=%sysoutreg, - \\ inputs=[%rax, %rdi], - \\ clobbers=[%rcx, %r11, %memory], - \\ args=[%6, %7]) - \\ - \\ %9 = unreachable() - \\}) - \\ - \\@9 = str("_start") - \\@10 = export(@9, @8) - , - \\Hello, world! - \\ + ctx.addZIRCompareOutput( + "hello world ZIR, update msg", + &[_][]const u8{ + \\@noreturn = primitive(noreturn) + \\@void = primitive(void) + \\@usize = primitive(usize) + \\@0 = int(0) + \\@1 = int(1) + \\@2 = int(2) + \\@3 = int(3) + \\ + \\@syscall_array = str("syscall") + \\@sysoutreg_array = str("={rax}") + \\@rax_array = str("{rax}") + \\@rdi_array = str("{rdi}") + \\@rcx_array = str("rcx") + \\@r11_array = str("r11") + \\@rdx_array = str("{rdx}") + \\@rsi_array = str("{rsi}") + \\@memory_array = str("memory") + \\@len_array = str("len") + \\ + \\@msg = str("Hello, world!\n") + \\ + \\@start_fnty = fntype([], @noreturn, cc=Naked) + \\@start = fn(@start_fnty, { + \\ %SYS_exit_group = int(231) + \\ %exit_code = as(@usize, @0) + \\ + \\ %syscall = ref(@syscall_array) + \\ %sysoutreg = ref(@sysoutreg_array) + \\ %rax = ref(@rax_array) + \\ %rdi = ref(@rdi_array) + \\ %rcx = ref(@rcx_array) + \\ %rdx = ref(@rdx_array) + \\ %rsi = ref(@rsi_array) + \\ %r11 = ref(@r11_array) + \\ %memory = ref(@memory_array) + \\ + \\ %SYS_write = as(@usize, @1) + \\ %STDOUT_FILENO = as(@usize, @1) + \\ + \\ %msg_ptr = ref(@msg) + \\ %msg_addr = ptrtoint(%msg_ptr) + \\ + \\ %len_name = ref(@len_array) + \\ %msg_len_ptr = fieldptr(%msg_ptr, %len_name) + \\ %msg_len = deref(%msg_len_ptr) + \\ %rc_write = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi, %rsi, %rdx], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_write, %STDOUT_FILENO, %msg_addr, %msg_len]) + \\ + \\ %rc_exit = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_exit_group, %exit_code]) + \\ + \\ %99 = unreachable() + \\}); + \\ + \\@9 = str("_start") + \\@10 = ref(@9) + \\@11 = export(@10, @start) + , + \\@noreturn = primitive(noreturn) + \\@void = primitive(void) + \\@usize = primitive(usize) + \\@0 = int(0) + \\@1 = int(1) + \\@2 = int(2) + \\@3 = int(3) + \\ + \\@syscall_array = str("syscall") + \\@sysoutreg_array = str("={rax}") + \\@rax_array = str("{rax}") + \\@rdi_array = str("{rdi}") + \\@rcx_array = str("rcx") + \\@r11_array = str("r11") + \\@rdx_array = str("{rdx}") + \\@rsi_array = str("{rsi}") + \\@memory_array = str("memory") + \\@len_array = str("len") + \\ + \\@msg = str("Hello, world!\n") + \\@msg2 = str("HELL WORLD\n") + \\ + \\@start_fnty = fntype([], @noreturn, cc=Naked) + \\@start = fn(@start_fnty, { + \\ %SYS_exit_group = int(231) + \\ %exit_code = as(@usize, @0) + \\ + \\ %syscall = ref(@syscall_array) + \\ %sysoutreg = ref(@sysoutreg_array) + \\ %rax = ref(@rax_array) + \\ %rdi = ref(@rdi_array) + \\ %rcx = ref(@rcx_array) + \\ %rdx = ref(@rdx_array) + \\ %rsi = ref(@rsi_array) + \\ %r11 = ref(@r11_array) + \\ %memory = ref(@memory_array) + \\ + \\ %SYS_write = as(@usize, @1) + \\ %STDOUT_FILENO = as(@usize, @1) + \\ + \\ %msg_ptr = ref(@msg2) + \\ %msg_addr = ptrtoint(%msg_ptr) + \\ + \\ %len_name = ref(@len_array) + \\ %msg_len_ptr = fieldptr(%msg_ptr, %len_name) + \\ %msg_len = deref(%msg_len_ptr) + \\ %rc_write = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi, %rsi, %rdx], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_write, %STDOUT_FILENO, %msg_addr, %msg_len]) + \\ + \\ %rc_exit = asm(%syscall, @usize, + \\ volatile=1, + \\ output=%sysoutreg, + \\ inputs=[%rax, %rdi], + \\ clobbers=[%rcx, %r11, %memory], + \\ args=[%SYS_exit_group, %exit_code]) + \\ + \\ %99 = unreachable() + \\}); + \\ + \\@9 = str("_start") + \\@10 = ref(@9) + \\@11 = export(@10, @start) + }, + &[_][]const u8{ + \\Hello, world! + \\ + , + \\HELL WORLD + \\ + }, ); }