diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 585ba6c51a..f18b138440 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -1,447 +1,137 @@ const std = @import("std"); -const Compilation = @import("compilation.zig").Compilation; -const llvm = @import("llvm.zig"); -const c = @import("c.zig"); -const ir = @import("ir.zig"); -const Value = @import("value.zig").Value; -const Type = @import("type.zig").Type; -const Scope = @import("scope.zig").Scope; -const util = @import("util.zig"); -const event = std.event; +const mem = std.mem; const assert = std.debug.assert; -const DW = std.dwarf; -const maxInt = std.math.maxInt; +const ir = @import("ir.zig"); +const Type = @import("type.zig").Type; +const Value = @import("value.zig").Value; -pub async fn renderToLlvm(comp: *Compilation, fn_val: *Value.Fn, code: *ir.Code) Compilation.BuildError!void { - fn_val.base.ref(); - defer fn_val.base.deref(comp); - defer code.destroy(comp.gpa()); +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, +}; - var output_path = try comp.createRandomOutputPath(comp.target.oFileExt()); - errdefer output_path.deinit(); +pub const Symbol = struct { + errors: []ErrorMsg, - const llvm_handle = try comp.zig_compiler.getAnyLlvmContext(); - defer llvm_handle.release(comp.zig_compiler); - - const context = llvm_handle.node.data; - - const module = llvm.ModuleCreateWithNameInContext(comp.name.span(), context) orelse return error.OutOfMemory; - defer llvm.DisposeModule(module); - - llvm.SetTarget(module, comp.llvm_triple.span()); - llvm.SetDataLayout(module, comp.target_layout_str); - - if (comp.target.getObjectFormat() == .coff) { - llvm.AddModuleCodeViewFlag(module); - } else { - llvm.AddModuleDebugInfoFlag(module); - } - - const builder = llvm.CreateBuilderInContext(context) orelse return error.OutOfMemory; - defer llvm.DisposeBuilder(builder); - - const dibuilder = llvm.CreateDIBuilder(module, true) orelse return error.OutOfMemory; - defer llvm.DisposeDIBuilder(dibuilder); - - // Don't use ZIG_VERSION_STRING here. LLVM misparses it when it includes - // the git revision. - const producer = try std.fmt.allocPrintZ(&code.arena.allocator, "zig {}.{}.{}", .{ - @as(u32, c.ZIG_VERSION_MAJOR), - @as(u32, c.ZIG_VERSION_MINOR), - @as(u32, c.ZIG_VERSION_PATCH), - }); - const flags = ""; - const runtime_version = 0; - const compile_unit_file = llvm.CreateFile( - dibuilder, - comp.name.span(), - comp.root_package.root_src_dir.span(), - ) orelse return error.OutOfMemory; - const is_optimized = comp.build_mode != .Debug; - const compile_unit = llvm.CreateCompileUnit( - dibuilder, - DW.LANG_C99, - compile_unit_file, - producer, - is_optimized, - flags, - runtime_version, - "", - 0, - !comp.strip, - ) orelse return error.OutOfMemory; - - var ofile = ObjectFile{ - .comp = comp, - .module = module, - .builder = builder, - .dibuilder = dibuilder, - .context = context, - .lock = event.Lock.init(), - .arena = &code.arena.allocator, - }; - - try renderToLlvmModule(&ofile, fn_val, code); - - // TODO module level assembly - //if (buf_len(&g->global_asm) != 0) { - // LLVMSetModuleInlineAsm(g->module, buf_ptr(&g->global_asm)); - //} - - llvm.DIBuilderFinalize(dibuilder); - - if (comp.verbose_llvm_ir) { - std.debug.warn("raw module:\n", .{}); - llvm.DumpModule(ofile.module); - } - - // verify the llvm module when safety is on - if (std.debug.runtime_safety) { - var error_ptr: ?[*:0]u8 = null; - _ = llvm.VerifyModule(ofile.module, llvm.AbortProcessAction, &error_ptr); - } - - const is_small = comp.build_mode == .ReleaseSmall; - const is_debug = comp.build_mode == .Debug; - - var err_msg: [*:0]u8 = undefined; - // TODO integrate this with evented I/O - if (llvm.TargetMachineEmitToFile( - comp.target_machine, - module, - output_path.span(), - llvm.EmitBinary, - &err_msg, - is_debug, - is_small, - )) { - if (std.debug.runtime_safety) { - std.debug.panic("unable to write object file {}: {s}\n", .{ output_path.span(), err_msg }); + pub fn deinit(self: *Symbol, allocator: *mem.Allocator) void { + for (self.errors) |err| { + allocator.free(err.msg); } - return error.WritingObjectFileFailed; - } - //validate_inline_fns(g); TODO - fn_val.containing_object = output_path; - if (comp.verbose_llvm_ir) { - std.debug.warn("optimized module:\n", .{}); - llvm.DumpModule(ofile.module); - } - if (comp.verbose_link) { - std.debug.warn("created {}\n", .{output_path.span()}); - } -} - -pub const ObjectFile = struct { - comp: *Compilation, - module: *llvm.Module, - builder: *llvm.Builder, - dibuilder: *llvm.DIBuilder, - context: *llvm.Context, - lock: event.Lock, - arena: *std.mem.Allocator, - - fn gpa(self: *ObjectFile) *std.mem.Allocator { - return self.comp.gpa(); + allocator.free(self.errors); + self.* = undefined; } }; -pub fn renderToLlvmModule(ofile: *ObjectFile, fn_val: *Value.Fn, code: *ir.Code) !void { - // TODO audit more of codegen.cpp:fn_llvm_value and port more logic - const llvm_fn_type = try fn_val.base.typ.getLlvmType(ofile.arena, ofile.context); - const llvm_fn = llvm.AddFunction( - ofile.module, - fn_val.symbol_name.span(), - llvm_fn_type, - ) orelse return error.OutOfMemory; +pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !Symbol { + switch (typed_value.ty.zigTypeTag()) { + .Fn => { + const index = typed_value.val.cast(Value.Payload.Function).?.index; + const module_fn = module.fns[index]; - const want_fn_safety = fn_val.block_scope.?.safety.get(ofile.comp); - if (want_fn_safety and ofile.comp.haveLibC()) { - try addLLVMFnAttr(ofile, llvm_fn, "sspstrong"); - try addLLVMFnAttrStr(ofile, llvm_fn, "stack-protector-buffer-size", "4"); - } + var function = Function{ + .module = &module, + .mod_fn = &module_fn, + .code = code, + .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator), + .errors = std.ArrayList(ErrorMsg).init(code.allocator), + .constants = std.ArrayList(ir.TypedValue).init(code.allocator), + }; + defer function.inst_table.deinit(); + defer function.errors.deinit(); - // TODO - //if (fn_val.align_stack) |align_stack| { - // try addLLVMFnAttrInt(ofile, llvm_fn, "alignstack", align_stack); - //} - - const fn_type = fn_val.base.typ.cast(Type.Fn).?; - const fn_type_normal = &fn_type.key.data.Normal; - - try addLLVMFnAttr(ofile, llvm_fn, "nounwind"); - //add_uwtable_attr(g, fn_table_entry->llvm_value); - try addLLVMFnAttr(ofile, llvm_fn, "nobuiltin"); - - //if (g->build_mode == BuildModeDebug && fn_table_entry->fn_inline != FnInlineAlways) { - // ZigLLVMAddFunctionAttr(fn_table_entry->llvm_value, "no-frame-pointer-elim", "true"); - // ZigLLVMAddFunctionAttr(fn_table_entry->llvm_value, "no-frame-pointer-elim-non-leaf", nullptr); - //} - - //if (fn_table_entry->section_name) { - // LLVMSetSection(fn_table_entry->llvm_value, buf_ptr(fn_table_entry->section_name)); - //} - //if (fn_table_entry->align_bytes > 0) { - // LLVMSetAlignment(fn_table_entry->llvm_value, (unsigned)fn_table_entry->align_bytes); - //} else { - // // We'd like to set the best alignment for the function here, but on Darwin LLVM gives - // // "Cannot getTypeInfo() on a type that is unsized!" assertion failure when calling - // // any of the functions for getting alignment. Not specifying the alignment should - // // use the ABI alignment, which is fine. - //} - - //if (!type_has_bits(return_type)) { - // // nothing to do - //} else if (type_is_codegen_pointer(return_type)) { - // addLLVMAttr(fn_table_entry->llvm_value, 0, "nonnull"); - //} else if (handle_is_ptr(return_type) && - // calling_convention_does_first_arg_return(fn_type->data.fn.fn_type_id.cc)) - //{ - // addLLVMArgAttr(fn_table_entry->llvm_value, 0, "sret"); - // addLLVMArgAttr(fn_table_entry->llvm_value, 0, "nonnull"); - //} - - // TODO set parameter attributes - - // TODO - //uint32_t err_ret_trace_arg_index = get_err_ret_trace_arg_index(g, fn_table_entry); - //if (err_ret_trace_arg_index != UINT32_MAX) { - // addLLVMArgAttr(fn_table_entry->llvm_value, (unsigned)err_ret_trace_arg_index, "nonnull"); - //} - - const cur_ret_ptr = if (fn_type_normal.return_type.handleIsPtr()) llvm.GetParam(llvm_fn, 0) else null; - - // build all basic blocks - for (code.basic_block_list.span()) |bb| { - bb.llvm_block = llvm.AppendBasicBlockInContext( - ofile.context, - llvm_fn, - bb.name_hint, - ) orelse return error.OutOfMemory; - } - const entry_bb = code.basic_block_list.at(0); - llvm.PositionBuilderAtEnd(ofile.builder, entry_bb.llvm_block); - - llvm.ClearCurrentDebugLocation(ofile.builder); - - // TODO set up error return tracing - // TODO allocate temporary stack values - - const var_list = fn_type.non_key.Normal.variable_list.span(); - // create debug variable declarations for variables and allocate all local variables - for (var_list) |var_scope, i| { - const var_type = switch (var_scope.data) { - .Const => unreachable, - .Param => |param| param.typ, - }; - // if (!type_has_bits(var->value->type)) { - // continue; - // } - // if (ir_get_var_is_comptime(var)) - // continue; - // if (type_requires_comptime(var->value->type)) - // continue; - // if (var->src_arg_index == SIZE_MAX) { - // var->value_ref = build_alloca(g, var->value->type, buf_ptr(&var->name), var->align_bytes); - - // var->di_loc_var = ZigLLVMCreateAutoVariable(g->dbuilder, get_di_scope(g, var->parent_scope), - // buf_ptr(&var->name), import->di_file, (unsigned)(var->decl_node->line + 1), - // var->value->type->di_type, !g->strip_debug_symbols, 0); - - // } else { - // it's a parameter - // assert(var->gen_arg_index != SIZE_MAX); - // TypeTableEntry *gen_type; - // FnGenParamInfo *gen_info = &fn_table_entry->type_entry->data.fn.gen_param_info[var->src_arg_index]; - - if (var_type.handleIsPtr()) { - // if (gen_info->is_byval) { - // gen_type = var->value->type; - // } else { - // gen_type = gen_info->type; - // } - var_scope.data.Param.llvm_value = llvm.GetParam(llvm_fn, @intCast(c_uint, i)); - } else { - // gen_type = var->value->type; - var_scope.data.Param.llvm_value = try renderAlloca(ofile, var_type, var_scope.name, .Abi); - } - // if (var->decl_node) { - // var->di_loc_var = ZigLLVMCreateParameterVariable(g->dbuilder, get_di_scope(g, var->parent_scope), - // buf_ptr(&var->name), import->di_file, - // (unsigned)(var->decl_node->line + 1), - // gen_type->di_type, !g->strip_debug_symbols, 0, (unsigned)(var->gen_arg_index + 1)); - // } - - // } - } - - // TODO finishing error return trace setup. we have to do this after all the allocas. - - // create debug variable declarations for parameters - // rely on the first variables in the variable_list being parameters. - //size_t next_var_i = 0; - for (fn_type.key.data.Normal.params) |param, i| { - //FnGenParamInfo *info = &fn_table_entry->type_entry->data.fn.gen_param_info[param_i]; - //if (info->gen_index == SIZE_MAX) - // continue; - const scope_var = var_list[i]; - //assert(variable->src_arg_index != SIZE_MAX); - //next_var_i += 1; - //assert(variable); - //assert(variable->value_ref); - - if (!param.typ.handleIsPtr()) { - //clear_debug_source_node(g); - const llvm_param = llvm.GetParam(llvm_fn, @intCast(c_uint, i)); - _ = try renderStoreUntyped( - ofile, - llvm_param, - scope_var.data.Param.llvm_value, - .Abi, - .Non, - ); - } - - //if (variable->decl_node) { - // gen_var_debug_decl(g, variable); - //} - } - - for (code.basic_block_list.span()) |current_block| { - llvm.PositionBuilderAtEnd(ofile.builder, current_block.llvm_block); - for (current_block.instruction_list.span()) |instruction| { - if (instruction.ref_count == 0 and !instruction.hasSideEffects()) continue; - - instruction.llvm_value = try instruction.render(ofile, fn_val); - } - current_block.llvm_exit_block = llvm.GetInsertBlock(ofile.builder); + for (module_fn.body) |inst| { + const new_inst = function.genFuncInst(inst) catch |err| switch (err) { + error.CodegenFail => { + assert(function.errors.items.len != 0); + break; + }, + else => |e| return e, + }; + try function.inst_table.putNoClobber(inst, new_inst); + } + return Symbol{ .errors = function.errors.toOwnedSlice() }; + }, + else => @panic("TODO implement generateSymbol for non-function types"), } } -fn addLLVMAttr( - ofile: *ObjectFile, - val: *llvm.Value, - attr_index: llvm.AttributeIndex, - attr_name: []const u8, -) !void { - const kind_id = llvm.GetEnumAttributeKindForName(attr_name.ptr, attr_name.len); - assert(kind_id != 0); - const llvm_attr = llvm.CreateEnumAttribute(ofile.context, kind_id, 0) orelse return error.OutOfMemory; - llvm.AddAttributeAtIndex(val, attr_index, llvm_attr); -} +const Function = struct { + module: *const ir.Module, + mod_fn: *const ir.Module.Fn, + code: *std.ArrayList(u8), + inst_table: std.AutoHashMap(*ir.Inst, MCValue), + /// Constants are embedded within functions (at the end, after `ret`) + /// so that they are independently updateable. + /// This is a list of constants that must be appended to the symbol after `ret`. + constants: std.ArrayList(ir.TypedValue), + errors: std.ArrayList(ErrorMsg), -fn addLLVMAttrStr( - ofile: *ObjectFile, - val: *llvm.Value, - attr_index: llvm.AttributeIndex, - attr_name: []const u8, - attr_val: []const u8, -) !void { - const llvm_attr = llvm.CreateStringAttribute( - ofile.context, - attr_name.ptr, - @intCast(c_uint, attr_name.len), - attr_val.ptr, - @intCast(c_uint, attr_val.len), - ) orelse return error.OutOfMemory; - llvm.AddAttributeAtIndex(val, attr_index, llvm_attr); -} - -fn addLLVMAttrInt( - val: *llvm.Value, - attr_index: llvm.AttributeIndex, - attr_name: []const u8, - attr_val: u64, -) !void { - const kind_id = llvm.GetEnumAttributeKindForName(attr_name.ptr, attr_name.len); - assert(kind_id != 0); - const llvm_attr = llvm.CreateEnumAttribute(ofile.context, kind_id, attr_val) orelse return error.OutOfMemory; - llvm.AddAttributeAtIndex(val, attr_index, llvm_attr); -} - -fn addLLVMFnAttr(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8) !void { - return addLLVMAttr(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name); -} - -fn addLLVMFnAttrStr(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8, attr_val: []const u8) !void { - return addLLVMAttrStr(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name, attr_val); -} - -fn addLLVMFnAttrInt(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8, attr_val: u64) !void { - return addLLVMAttrInt(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name, attr_val); -} - -fn renderLoadUntyped( - ofile: *ObjectFile, - ptr: *llvm.Value, - alignment: Type.Pointer.Align, - vol: Type.Pointer.Vol, - name: [*:0]const u8, -) !*llvm.Value { - const result = llvm.BuildLoad(ofile.builder, ptr, name) orelse return error.OutOfMemory; - switch (vol) { - .Non => {}, - .Volatile => llvm.SetVolatile(result, 1), - } - llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm.GetElementType(llvm.TypeOf(ptr)))); - return result; -} - -fn renderLoad(ofile: *ObjectFile, ptr: *llvm.Value, ptr_type: *Type.Pointer, name: [*:0]const u8) !*llvm.Value { - return renderLoadUntyped(ofile, ptr, ptr_type.key.alignment, ptr_type.key.vol, name); -} - -pub fn getHandleValue(ofile: *ObjectFile, ptr: *llvm.Value, ptr_type: *Type.Pointer) !?*llvm.Value { - const child_type = ptr_type.key.child_type; - if (!child_type.hasBits()) { - return null; - } - if (child_type.handleIsPtr()) { - return ptr; - } - return try renderLoad(ofile, ptr, ptr_type, ""); -} - -pub fn renderStoreUntyped( - ofile: *ObjectFile, - value: *llvm.Value, - ptr: *llvm.Value, - alignment: Type.Pointer.Align, - vol: Type.Pointer.Vol, -) !*llvm.Value { - const result = llvm.BuildStore(ofile.builder, value, ptr) orelse return error.OutOfMemory; - switch (vol) { - .Non => {}, - .Volatile => llvm.SetVolatile(result, 1), - } - llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm.TypeOf(value))); - return result; -} - -pub fn renderStore( - ofile: *ObjectFile, - value: *llvm.Value, - ptr: *llvm.Value, - ptr_type: *Type.Pointer, -) !*llvm.Value { - return renderStoreUntyped(ofile, value, ptr, ptr_type.key.alignment, ptr_type.key.vol); -} - -pub fn renderAlloca( - ofile: *ObjectFile, - var_type: *Type, - name: []const u8, - alignment: Type.Pointer.Align, -) !*llvm.Value { - const llvm_var_type = try var_type.getLlvmType(ofile.arena, ofile.context); - const name_with_null = try std.cstr.addNullByte(ofile.arena, name); - const result = llvm.BuildAlloca(ofile.builder, llvm_var_type, @ptrCast([*:0]const u8, name_with_null.ptr)) orelse return error.OutOfMemory; - llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm_var_type)); - return result; -} - -pub fn resolveAlign(ofile: *ObjectFile, alignment: Type.Pointer.Align, llvm_type: *llvm.Type) u32 { - return switch (alignment) { - .Abi => return llvm.ABIAlignmentOfType(ofile.comp.target_data_ref, llvm_type), - .Override => |a| a, + const MCValue = union(enum) { + none, + unreach, + /// A pointer-sized integer that fits in a register. + immediate: u64, + /// Refers to the index into `constants` field of `Function`. + local_const_ptr: usize, }; -} + + fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue { + switch (inst.tag) { + .unreach => return self.genPanic(inst.src), + .constant => unreachable, // excluded from function bodies + .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?), + .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?), + } + } + + fn genPanic(self: *Function, src: usize) !MCValue { + // TODO change this to call the panic function + switch (self.module.target.cpu.arch) { + .i386, .x86_64 => { + try self.code.append(0xcc); // x86 int3 + }, + else => return self.fail(src, "TODO implement panic for {}", .{self.module.target.cpu.arch}), + } + return .unreach; + } + + fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue { + return self.fail(inst.base.src, "TODO machine code gen assembly", .{}); + } + + fn genPtrToInt(self: *Function, inst: *ir.Inst.PtrToInt) !MCValue { + // no-op + return self.resolveInst(inst.args.ptr); + } + + fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue { + if (inst.cast(ir.Inst.Constant)) |const_inst| { + switch (inst.ty.zigTypeTag()) { + .Int => { + const info = inst.ty.intInfo(self.module.target); + const ptr_bits = self.module.target.cpu.arch.ptrBitWidth(); + if (info.bits > ptr_bits or info.signed) { + return self.fail(inst.src, "TODO const int bigger than ptr and signed int", .{}); + } + return MCValue{ .immediate = const_inst.val.toUnsignedInt() }; + }, + else => return self.fail(inst.src, "TODO implement const of type '{}'", .{inst.ty}), + } + } else { + return self.inst_table.getValue(inst).?; + } + } + + fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } { + @setCold(true); + const msg = try std.fmt.allocPrint(self.errors.allocator, format, args); + { + errdefer self.errors.allocator.free(msg); + (try self.errors.addOne()).* = .{ + .byte_offset = src, + .msg = msg, + }; + } + return error.CodegenFail; + } +}; diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index cea7729642..f3051df088 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -724,7 +724,16 @@ pub fn main() anyerror!void { } const link = @import("link.zig"); - try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out"); + var result = try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out"); + defer result.deinit(allocator); + if (result.errors.len != 0) { + for (result.errors) |err_msg| { + const loc = findLineColumn(source, err_msg.byte_offset); + std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); + } + if (debug_error_trace) return error.ParseFailure; + std.process.exit(1); + } } fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } { diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index f358a4b64d..4f89786b3c 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -5,15 +5,38 @@ const Allocator = std.mem.Allocator; const ir = @import("ir.zig"); const fs = std.fs; const elf = std.elf; +const codegen = @import("codegen.zig"); const executable_mode = 0o755; const default_entry_addr = 0x8000000; +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, +}; + +pub const Result = struct { + errors: []ErrorMsg, + + pub fn deinit(self: *Result, allocator: *mem.Allocator) void { + for (self.errors) |err| { + allocator.free(err.msg); + } + allocator.free(self.errors); + self.* = undefined; + } +}; + /// Attempts incremental linking, if the file already exists. /// If incremental linking fails, falls back to truncating the file and rewriting it. /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior. /// This operation is not atomic. -pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void { +pub fn updateExecutableFilePath( + allocator: *Allocator, + module: ir.Module, + dir: fs.Dir, + sub_path: []const u8, +) !Result { const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = executable_mode }); defer file.close(); @@ -21,12 +44,18 @@ pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: f } /// Atomically overwrites the old file, if present. -pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void { +pub fn writeExecutableFilePath( + allocator: *Allocator, + module: ir.Module, + dir: fs.Dir, + sub_path: []const u8, +) !Result { const af = try dir.atomicFile(sub_path, .{ .mode = executable_mode }); defer af.deinit(); - try writeExecutableFile(allocator, module, af.file); + const result = try writeExecutableFile(allocator, module, af.file); try af.finish(); + return result; } /// Attempts incremental linking, if the file already exists. @@ -34,8 +63,8 @@ pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs /// Returns an error if `file` is not already open with +read +write +seek abilities. /// A malicious file is detected as incremental link failure and does not cause Illegal Behavior. /// This operation is not atomic. -pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void { - updateExecutableFileInner(allocator, module, file) catch |err| switch (err) { +pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { + return updateExecutableFileInner(allocator, module, file) catch |err| switch (err) { error.IncrFailed => { return writeExecutableFile(allocator, module, file); }, @@ -66,20 +95,17 @@ const Update = struct { text_section_index: ?u16, symtab_section_index: ?u16, - /// Key: index into strtab. Value: index into symbols. - symbol_table: std.AutoHashMap(usize, usize), /// The same order as in the file symbols: std.ArrayList(elf.Elf64_Sym), - /// Sorted by address, index into symbols - symbols_by_addr: std.ArrayList(usize), + + errors: std.ArrayList(ErrorMsg), fn deinit(self: *Update) void { self.sections.deinit(); self.program_headers.deinit(); self.shstrtab.deinit(); - self.symbol_table.deinit(); self.symbols.deinit(); - self.symbols_by_addr.deinit(); + self.errors.deinit(); } // `expand_num / expand_den` is the factor of padding when allocation @@ -162,6 +188,7 @@ const Update = struct { fn makeString(self: *Update, bytes: []const u8) !u32 { const result = self.shstrtab.items.len; try self.shstrtab.appendSlice(bytes); + try self.shstrtab.append(0); return @intCast(u32, result); } @@ -187,6 +214,7 @@ const Update = struct { const file_size = 256 * 1024; const p_align = 0x1000; const off = self.findFreeSpace(file_size, p_align); + //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); try self.program_headers.append(.{ .p_type = elf.PT_LOAD, .p_offset = off, @@ -194,10 +222,10 @@ const Update = struct { .p_vaddr = default_entry_addr, .p_paddr = default_entry_addr, .p_memsz = 0, - .p_align = 0x1000, + .p_align = p_align, .p_flags = elf.PF_X | elf.PF_R, }); - self.entry_addr = default_entry_addr; + self.entry_addr = null; phdr_load_re_dirty = true; phdr_table_dirty = true; } @@ -220,6 +248,7 @@ const Update = struct { if (self.shstrtab_index == null) { self.shstrtab_index = @intCast(u16, self.sections.items.len); const off = self.findFreeSpace(self.shstrtab.items.len, 1); + //std.debug.warn("found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len }); try self.sections.append(.{ .sh_name = try self.makeString(".shstrtab"), .sh_type = elf.SHT_STRTAB, @@ -259,6 +288,7 @@ const Update = struct { const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); const file_size = self.module.exports.len * each_size; const off = self.findFreeSpace(file_size, min_align); + //std.debug.warn("found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); try self.sections.append(.{ .sh_name = try self.makeString(".symtab"), @@ -307,6 +337,7 @@ const Update = struct { const needed_size = self.program_headers.items.len * phsize; if (needed_size > allocated_size) { + self.phdr_table_offset = null; // free the space self.phdr_table_offset = self.findFreeSpace(needed_size, phalign); } @@ -361,6 +392,7 @@ const Update = struct { const needed_size = self.sections.items.len * phsize; if (needed_size > allocated_size) { + self.shdr_table_offset = null; // free the space self.shdr_table_offset = self.findFreeSpace(needed_size, phalign); } @@ -414,11 +446,30 @@ const Update = struct { }, } } - if (shstrtab_dirty) { - try self.file.pwriteAll(self.shstrtab.items, self.sections.items[self.shstrtab_index.?].sh_offset); - } try self.writeCodeAndSymbols(); - try self.writeElfHeader(); + + const shstrtab_sect = &self.sections.items[self.shstrtab_index.?]; + if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) { + const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset); + const needed_size = self.shstrtab.items.len; + + if (needed_size > allocated_size) { + shstrtab_sect.sh_size = 0; // free the space + shstrtab_sect.sh_offset = self.findFreeSpace(needed_size, 1); + shstrtab_sect.sh_size = needed_size; + } + try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset); + } + if (self.entry_addr == null) { + const msg = try std.fmt.allocPrint(self.errors.allocator, "no entry point found", .{}); + errdefer self.errors.allocator.free(msg); + try self.errors.append(.{ + .byte_offset = 0, + .msg = msg, + }); + } else { + try self.writeElfHeader(); + } // TODO find end pos and truncate } @@ -540,13 +591,122 @@ const Update = struct { } fn writeCodeAndSymbols(self: *Update) !void { - @panic("TODO writeCodeAndSymbols"); + // index 0 is always a null symbol + try self.symbols.resize(1); + self.symbols.items[0] = .{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = 0, + }; + + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + var vaddr: u64 = phdr.p_vaddr; + + var code = std.ArrayList(u8).init(self.sections.allocator); + defer code.deinit(); + + for (self.module.exports) |exp| { + code.shrink(0); + var symbol = try codegen.generateSymbol(exp.typed_value, self.module.*, &code); + defer symbol.deinit(code.allocator); + if (symbol.errors.len != 0) { + for (symbol.errors) |err| { + const msg = try mem.dupe(self.errors.allocator, u8, err.msg); + errdefer self.errors.allocator.free(msg); + try self.errors.append(.{ + .byte_offset = err.byte_offset, + .msg = msg, + }); + } + continue; + } + + if (mem.eql(u8, exp.name, "_start")) { + self.entry_addr = vaddr; + } + (try self.symbols.addOne()).* = .{ + .st_name = try self.makeString(exp.name), + .st_info = (elf.STB_LOCAL << 4) | elf.STT_FUNC, + .st_other = 0, + .st_shndx = self.text_section_index.?, + .st_value = vaddr, + .st_size = code.items.len, + }; + vaddr += code.items.len; + } + + return self.writeSymbols(); + } + + fn writeSymbols(self: *Update) !void { + const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) { + 32 => .p32, + 64 => .p64, + else => return error.UnsupportedArchitecture, + }; + const small_ptr = ptr_width == .p32; + const syms_sect = &self.sections.items[self.symtab_section_index.?]; + const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); + const sym_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); + + const allocated_size = self.allocatedSize(syms_sect.sh_offset); + const needed_size = self.symbols.items.len * sym_size; + if (needed_size > allocated_size) { + syms_sect.sh_size = 0; // free the space + syms_sect.sh_offset = self.findFreeSpace(needed_size, sym_align); + syms_sect.sh_size = needed_size; + } + const allocator = self.symbols.allocator; + const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + switch (ptr_width) { + .p32 => { + const buf = try allocator.alloc(elf.Elf32_Sym, self.symbols.items.len); + defer allocator.free(buf); + + for (buf) |*sym, i| { + sym.* = .{ + .st_name = self.symbols.items[i].st_name, + .st_value = @intCast(u32, self.symbols.items[i].st_value), + .st_size = @intCast(u32, self.symbols.items[i].st_size), + .st_info = self.symbols.items[i].st_info, + .st_other = self.symbols.items[i].st_other, + .st_shndx = self.symbols.items[i].st_shndx, + }; + if (foreign_endian) { + bswapAllFields(elf.Elf32_Sym, sym); + } + } + try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); + }, + .p64 => { + const buf = try allocator.alloc(elf.Elf64_Sym, self.symbols.items.len); + defer allocator.free(buf); + + for (buf) |*sym, i| { + sym.* = .{ + .st_name = self.symbols.items[i].st_name, + .st_value = self.symbols.items[i].st_value, + .st_size = self.symbols.items[i].st_size, + .st_info = self.symbols.items[i].st_info, + .st_other = self.symbols.items[i].st_other, + .st_shndx = self.symbols.items[i].st_shndx, + }; + if (foreign_endian) { + bswapAllFields(elf.Elf64_Sym, sym); + } + } + try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset); + }, + } } }; /// Truncates the existing file contents and overwrites the contents. /// Returns an error if `file` is not already open with +read +write +seek abilities. -pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void { +pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { var update = Update{ .file = file, .module = &module, @@ -561,17 +721,20 @@ pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.Fi .text_section_index = null, .symtab_section_index = null, - .symbol_table = std.AutoHashMap(usize, usize).init(allocator), .symbols = std.ArrayList(elf.Elf64_Sym).init(allocator), - .symbols_by_addr = std.ArrayList(usize).init(allocator), + + .errors = std.ArrayList(ErrorMsg).init(allocator), }; defer update.deinit(); - return update.perform(); + try update.perform(); + return Result{ + .errors = update.errors.toOwnedSlice(), + }; } /// Returns error.IncrFailed if incremental update could not be performed. -fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !void { +fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !Result { //var ehdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined; // TODO implement incremental linking diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 03cda71387..aaa6f0867a 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -264,6 +264,56 @@ pub const Value = extern union { } } + /// Asserts the value is an integer and it fits in a u64 + pub fn toUnsignedInt(self: Value) u64 { + switch (self.tag()) { + .ty, + .u8_type, + .i8_type, + .isize_type, + .usize_type, + .c_short_type, + .c_ushort_type, + .c_int_type, + .c_uint_type, + .c_long_type, + .c_ulong_type, + .c_longlong_type, + .c_ulonglong_type, + .c_longdouble_type, + .f16_type, + .f32_type, + .f64_type, + .f128_type, + .c_void_type, + .bool_type, + .void_type, + .type_type, + .anyerror_type, + .comptime_int_type, + .comptime_float_type, + .noreturn_type, + .fn_naked_noreturn_no_args_type, + .single_const_pointer_to_comptime_int_type, + .const_slice_u8_type, + .void_value, + .noreturn_value, + .bool_true, + .bool_false, + .function, + .ref, + .ref_val, + .bytes, + => unreachable, + + .zero => return 0, + + .int_u64 => return self.cast(Payload.Int_u64).?.int, + .int_i64 => return @intCast(u64, self.cast(Payload.Int_u64).?.int), + .int_big => return self.cast(Payload.IntBig).?.big_int.to(u64) catch unreachable, + } + } + /// Asserts the value is an integer, and the destination type is ComptimeInt or Int. pub fn intFitsInType(self: Value, ty: Type, target: Target) bool { switch (self.tag()) {