basics of writing ELF and machine code generation

This commit is contained in:
Andrew Kelley 2020-04-23 16:41:20 -04:00
parent a3dfe36ca1
commit 24a01eed90
4 changed files with 367 additions and 455 deletions

View File

@ -1,447 +1,137 @@
const std = @import("std");
const Compilation = @import("compilation.zig").Compilation;
const llvm = @import("llvm.zig");
const c = @import("c.zig");
const ir = @import("ir.zig");
const Value = @import("value.zig").Value;
const Type = @import("type.zig").Type;
const Scope = @import("scope.zig").Scope;
const util = @import("util.zig");
const event = std.event;
const mem = std.mem;
const assert = std.debug.assert;
const DW = std.dwarf;
const maxInt = std.math.maxInt;
const ir = @import("ir.zig");
const Type = @import("type.zig").Type;
const Value = @import("value.zig").Value;
pub async fn renderToLlvm(comp: *Compilation, fn_val: *Value.Fn, code: *ir.Code) Compilation.BuildError!void {
fn_val.base.ref();
defer fn_val.base.deref(comp);
defer code.destroy(comp.gpa());
pub const ErrorMsg = struct {
byte_offset: usize,
msg: []const u8,
};
var output_path = try comp.createRandomOutputPath(comp.target.oFileExt());
errdefer output_path.deinit();
pub const Symbol = struct {
errors: []ErrorMsg,
const llvm_handle = try comp.zig_compiler.getAnyLlvmContext();
defer llvm_handle.release(comp.zig_compiler);
const context = llvm_handle.node.data;
const module = llvm.ModuleCreateWithNameInContext(comp.name.span(), context) orelse return error.OutOfMemory;
defer llvm.DisposeModule(module);
llvm.SetTarget(module, comp.llvm_triple.span());
llvm.SetDataLayout(module, comp.target_layout_str);
if (comp.target.getObjectFormat() == .coff) {
llvm.AddModuleCodeViewFlag(module);
} else {
llvm.AddModuleDebugInfoFlag(module);
}
const builder = llvm.CreateBuilderInContext(context) orelse return error.OutOfMemory;
defer llvm.DisposeBuilder(builder);
const dibuilder = llvm.CreateDIBuilder(module, true) orelse return error.OutOfMemory;
defer llvm.DisposeDIBuilder(dibuilder);
// Don't use ZIG_VERSION_STRING here. LLVM misparses it when it includes
// the git revision.
const producer = try std.fmt.allocPrintZ(&code.arena.allocator, "zig {}.{}.{}", .{
@as(u32, c.ZIG_VERSION_MAJOR),
@as(u32, c.ZIG_VERSION_MINOR),
@as(u32, c.ZIG_VERSION_PATCH),
});
const flags = "";
const runtime_version = 0;
const compile_unit_file = llvm.CreateFile(
dibuilder,
comp.name.span(),
comp.root_package.root_src_dir.span(),
) orelse return error.OutOfMemory;
const is_optimized = comp.build_mode != .Debug;
const compile_unit = llvm.CreateCompileUnit(
dibuilder,
DW.LANG_C99,
compile_unit_file,
producer,
is_optimized,
flags,
runtime_version,
"",
0,
!comp.strip,
) orelse return error.OutOfMemory;
var ofile = ObjectFile{
.comp = comp,
.module = module,
.builder = builder,
.dibuilder = dibuilder,
.context = context,
.lock = event.Lock.init(),
.arena = &code.arena.allocator,
};
try renderToLlvmModule(&ofile, fn_val, code);
// TODO module level assembly
//if (buf_len(&g->global_asm) != 0) {
// LLVMSetModuleInlineAsm(g->module, buf_ptr(&g->global_asm));
//}
llvm.DIBuilderFinalize(dibuilder);
if (comp.verbose_llvm_ir) {
std.debug.warn("raw module:\n", .{});
llvm.DumpModule(ofile.module);
}
// verify the llvm module when safety is on
if (std.debug.runtime_safety) {
var error_ptr: ?[*:0]u8 = null;
_ = llvm.VerifyModule(ofile.module, llvm.AbortProcessAction, &error_ptr);
}
const is_small = comp.build_mode == .ReleaseSmall;
const is_debug = comp.build_mode == .Debug;
var err_msg: [*:0]u8 = undefined;
// TODO integrate this with evented I/O
if (llvm.TargetMachineEmitToFile(
comp.target_machine,
module,
output_path.span(),
llvm.EmitBinary,
&err_msg,
is_debug,
is_small,
)) {
if (std.debug.runtime_safety) {
std.debug.panic("unable to write object file {}: {s}\n", .{ output_path.span(), err_msg });
pub fn deinit(self: *Symbol, allocator: *mem.Allocator) void {
for (self.errors) |err| {
allocator.free(err.msg);
}
return error.WritingObjectFileFailed;
}
//validate_inline_fns(g); TODO
fn_val.containing_object = output_path;
if (comp.verbose_llvm_ir) {
std.debug.warn("optimized module:\n", .{});
llvm.DumpModule(ofile.module);
}
if (comp.verbose_link) {
std.debug.warn("created {}\n", .{output_path.span()});
}
}
pub const ObjectFile = struct {
comp: *Compilation,
module: *llvm.Module,
builder: *llvm.Builder,
dibuilder: *llvm.DIBuilder,
context: *llvm.Context,
lock: event.Lock,
arena: *std.mem.Allocator,
fn gpa(self: *ObjectFile) *std.mem.Allocator {
return self.comp.gpa();
allocator.free(self.errors);
self.* = undefined;
}
};
pub fn renderToLlvmModule(ofile: *ObjectFile, fn_val: *Value.Fn, code: *ir.Code) !void {
// TODO audit more of codegen.cpp:fn_llvm_value and port more logic
const llvm_fn_type = try fn_val.base.typ.getLlvmType(ofile.arena, ofile.context);
const llvm_fn = llvm.AddFunction(
ofile.module,
fn_val.symbol_name.span(),
llvm_fn_type,
) orelse return error.OutOfMemory;
pub fn generateSymbol(typed_value: ir.TypedValue, module: ir.Module, code: *std.ArrayList(u8)) !Symbol {
switch (typed_value.ty.zigTypeTag()) {
.Fn => {
const index = typed_value.val.cast(Value.Payload.Function).?.index;
const module_fn = module.fns[index];
const want_fn_safety = fn_val.block_scope.?.safety.get(ofile.comp);
if (want_fn_safety and ofile.comp.haveLibC()) {
try addLLVMFnAttr(ofile, llvm_fn, "sspstrong");
try addLLVMFnAttrStr(ofile, llvm_fn, "stack-protector-buffer-size", "4");
}
var function = Function{
.module = &module,
.mod_fn = &module_fn,
.code = code,
.inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(code.allocator),
.errors = std.ArrayList(ErrorMsg).init(code.allocator),
.constants = std.ArrayList(ir.TypedValue).init(code.allocator),
};
defer function.inst_table.deinit();
defer function.errors.deinit();
// TODO
//if (fn_val.align_stack) |align_stack| {
// try addLLVMFnAttrInt(ofile, llvm_fn, "alignstack", align_stack);
//}
const fn_type = fn_val.base.typ.cast(Type.Fn).?;
const fn_type_normal = &fn_type.key.data.Normal;
try addLLVMFnAttr(ofile, llvm_fn, "nounwind");
//add_uwtable_attr(g, fn_table_entry->llvm_value);
try addLLVMFnAttr(ofile, llvm_fn, "nobuiltin");
//if (g->build_mode == BuildModeDebug && fn_table_entry->fn_inline != FnInlineAlways) {
// ZigLLVMAddFunctionAttr(fn_table_entry->llvm_value, "no-frame-pointer-elim", "true");
// ZigLLVMAddFunctionAttr(fn_table_entry->llvm_value, "no-frame-pointer-elim-non-leaf", nullptr);
//}
//if (fn_table_entry->section_name) {
// LLVMSetSection(fn_table_entry->llvm_value, buf_ptr(fn_table_entry->section_name));
//}
//if (fn_table_entry->align_bytes > 0) {
// LLVMSetAlignment(fn_table_entry->llvm_value, (unsigned)fn_table_entry->align_bytes);
//} else {
// // We'd like to set the best alignment for the function here, but on Darwin LLVM gives
// // "Cannot getTypeInfo() on a type that is unsized!" assertion failure when calling
// // any of the functions for getting alignment. Not specifying the alignment should
// // use the ABI alignment, which is fine.
//}
//if (!type_has_bits(return_type)) {
// // nothing to do
//} else if (type_is_codegen_pointer(return_type)) {
// addLLVMAttr(fn_table_entry->llvm_value, 0, "nonnull");
//} else if (handle_is_ptr(return_type) &&
// calling_convention_does_first_arg_return(fn_type->data.fn.fn_type_id.cc))
//{
// addLLVMArgAttr(fn_table_entry->llvm_value, 0, "sret");
// addLLVMArgAttr(fn_table_entry->llvm_value, 0, "nonnull");
//}
// TODO set parameter attributes
// TODO
//uint32_t err_ret_trace_arg_index = get_err_ret_trace_arg_index(g, fn_table_entry);
//if (err_ret_trace_arg_index != UINT32_MAX) {
// addLLVMArgAttr(fn_table_entry->llvm_value, (unsigned)err_ret_trace_arg_index, "nonnull");
//}
const cur_ret_ptr = if (fn_type_normal.return_type.handleIsPtr()) llvm.GetParam(llvm_fn, 0) else null;
// build all basic blocks
for (code.basic_block_list.span()) |bb| {
bb.llvm_block = llvm.AppendBasicBlockInContext(
ofile.context,
llvm_fn,
bb.name_hint,
) orelse return error.OutOfMemory;
}
const entry_bb = code.basic_block_list.at(0);
llvm.PositionBuilderAtEnd(ofile.builder, entry_bb.llvm_block);
llvm.ClearCurrentDebugLocation(ofile.builder);
// TODO set up error return tracing
// TODO allocate temporary stack values
const var_list = fn_type.non_key.Normal.variable_list.span();
// create debug variable declarations for variables and allocate all local variables
for (var_list) |var_scope, i| {
const var_type = switch (var_scope.data) {
.Const => unreachable,
.Param => |param| param.typ,
};
// if (!type_has_bits(var->value->type)) {
// continue;
// }
// if (ir_get_var_is_comptime(var))
// continue;
// if (type_requires_comptime(var->value->type))
// continue;
// if (var->src_arg_index == SIZE_MAX) {
// var->value_ref = build_alloca(g, var->value->type, buf_ptr(&var->name), var->align_bytes);
// var->di_loc_var = ZigLLVMCreateAutoVariable(g->dbuilder, get_di_scope(g, var->parent_scope),
// buf_ptr(&var->name), import->di_file, (unsigned)(var->decl_node->line + 1),
// var->value->type->di_type, !g->strip_debug_symbols, 0);
// } else {
// it's a parameter
// assert(var->gen_arg_index != SIZE_MAX);
// TypeTableEntry *gen_type;
// FnGenParamInfo *gen_info = &fn_table_entry->type_entry->data.fn.gen_param_info[var->src_arg_index];
if (var_type.handleIsPtr()) {
// if (gen_info->is_byval) {
// gen_type = var->value->type;
// } else {
// gen_type = gen_info->type;
// }
var_scope.data.Param.llvm_value = llvm.GetParam(llvm_fn, @intCast(c_uint, i));
} else {
// gen_type = var->value->type;
var_scope.data.Param.llvm_value = try renderAlloca(ofile, var_type, var_scope.name, .Abi);
}
// if (var->decl_node) {
// var->di_loc_var = ZigLLVMCreateParameterVariable(g->dbuilder, get_di_scope(g, var->parent_scope),
// buf_ptr(&var->name), import->di_file,
// (unsigned)(var->decl_node->line + 1),
// gen_type->di_type, !g->strip_debug_symbols, 0, (unsigned)(var->gen_arg_index + 1));
// }
// }
}
// TODO finishing error return trace setup. we have to do this after all the allocas.
// create debug variable declarations for parameters
// rely on the first variables in the variable_list being parameters.
//size_t next_var_i = 0;
for (fn_type.key.data.Normal.params) |param, i| {
//FnGenParamInfo *info = &fn_table_entry->type_entry->data.fn.gen_param_info[param_i];
//if (info->gen_index == SIZE_MAX)
// continue;
const scope_var = var_list[i];
//assert(variable->src_arg_index != SIZE_MAX);
//next_var_i += 1;
//assert(variable);
//assert(variable->value_ref);
if (!param.typ.handleIsPtr()) {
//clear_debug_source_node(g);
const llvm_param = llvm.GetParam(llvm_fn, @intCast(c_uint, i));
_ = try renderStoreUntyped(
ofile,
llvm_param,
scope_var.data.Param.llvm_value,
.Abi,
.Non,
);
}
//if (variable->decl_node) {
// gen_var_debug_decl(g, variable);
//}
}
for (code.basic_block_list.span()) |current_block| {
llvm.PositionBuilderAtEnd(ofile.builder, current_block.llvm_block);
for (current_block.instruction_list.span()) |instruction| {
if (instruction.ref_count == 0 and !instruction.hasSideEffects()) continue;
instruction.llvm_value = try instruction.render(ofile, fn_val);
}
current_block.llvm_exit_block = llvm.GetInsertBlock(ofile.builder);
for (module_fn.body) |inst| {
const new_inst = function.genFuncInst(inst) catch |err| switch (err) {
error.CodegenFail => {
assert(function.errors.items.len != 0);
break;
},
else => |e| return e,
};
try function.inst_table.putNoClobber(inst, new_inst);
}
return Symbol{ .errors = function.errors.toOwnedSlice() };
},
else => @panic("TODO implement generateSymbol for non-function types"),
}
}
fn addLLVMAttr(
ofile: *ObjectFile,
val: *llvm.Value,
attr_index: llvm.AttributeIndex,
attr_name: []const u8,
) !void {
const kind_id = llvm.GetEnumAttributeKindForName(attr_name.ptr, attr_name.len);
assert(kind_id != 0);
const llvm_attr = llvm.CreateEnumAttribute(ofile.context, kind_id, 0) orelse return error.OutOfMemory;
llvm.AddAttributeAtIndex(val, attr_index, llvm_attr);
}
const Function = struct {
module: *const ir.Module,
mod_fn: *const ir.Module.Fn,
code: *std.ArrayList(u8),
inst_table: std.AutoHashMap(*ir.Inst, MCValue),
/// Constants are embedded within functions (at the end, after `ret`)
/// so that they are independently updateable.
/// This is a list of constants that must be appended to the symbol after `ret`.
constants: std.ArrayList(ir.TypedValue),
errors: std.ArrayList(ErrorMsg),
fn addLLVMAttrStr(
ofile: *ObjectFile,
val: *llvm.Value,
attr_index: llvm.AttributeIndex,
attr_name: []const u8,
attr_val: []const u8,
) !void {
const llvm_attr = llvm.CreateStringAttribute(
ofile.context,
attr_name.ptr,
@intCast(c_uint, attr_name.len),
attr_val.ptr,
@intCast(c_uint, attr_val.len),
) orelse return error.OutOfMemory;
llvm.AddAttributeAtIndex(val, attr_index, llvm_attr);
}
fn addLLVMAttrInt(
val: *llvm.Value,
attr_index: llvm.AttributeIndex,
attr_name: []const u8,
attr_val: u64,
) !void {
const kind_id = llvm.GetEnumAttributeKindForName(attr_name.ptr, attr_name.len);
assert(kind_id != 0);
const llvm_attr = llvm.CreateEnumAttribute(ofile.context, kind_id, attr_val) orelse return error.OutOfMemory;
llvm.AddAttributeAtIndex(val, attr_index, llvm_attr);
}
fn addLLVMFnAttr(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8) !void {
return addLLVMAttr(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name);
}
fn addLLVMFnAttrStr(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8, attr_val: []const u8) !void {
return addLLVMAttrStr(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name, attr_val);
}
fn addLLVMFnAttrInt(ofile: *ObjectFile, fn_val: *llvm.Value, attr_name: []const u8, attr_val: u64) !void {
return addLLVMAttrInt(ofile, fn_val, maxInt(llvm.AttributeIndex), attr_name, attr_val);
}
fn renderLoadUntyped(
ofile: *ObjectFile,
ptr: *llvm.Value,
alignment: Type.Pointer.Align,
vol: Type.Pointer.Vol,
name: [*:0]const u8,
) !*llvm.Value {
const result = llvm.BuildLoad(ofile.builder, ptr, name) orelse return error.OutOfMemory;
switch (vol) {
.Non => {},
.Volatile => llvm.SetVolatile(result, 1),
}
llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm.GetElementType(llvm.TypeOf(ptr))));
return result;
}
fn renderLoad(ofile: *ObjectFile, ptr: *llvm.Value, ptr_type: *Type.Pointer, name: [*:0]const u8) !*llvm.Value {
return renderLoadUntyped(ofile, ptr, ptr_type.key.alignment, ptr_type.key.vol, name);
}
pub fn getHandleValue(ofile: *ObjectFile, ptr: *llvm.Value, ptr_type: *Type.Pointer) !?*llvm.Value {
const child_type = ptr_type.key.child_type;
if (!child_type.hasBits()) {
return null;
}
if (child_type.handleIsPtr()) {
return ptr;
}
return try renderLoad(ofile, ptr, ptr_type, "");
}
pub fn renderStoreUntyped(
ofile: *ObjectFile,
value: *llvm.Value,
ptr: *llvm.Value,
alignment: Type.Pointer.Align,
vol: Type.Pointer.Vol,
) !*llvm.Value {
const result = llvm.BuildStore(ofile.builder, value, ptr) orelse return error.OutOfMemory;
switch (vol) {
.Non => {},
.Volatile => llvm.SetVolatile(result, 1),
}
llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm.TypeOf(value)));
return result;
}
pub fn renderStore(
ofile: *ObjectFile,
value: *llvm.Value,
ptr: *llvm.Value,
ptr_type: *Type.Pointer,
) !*llvm.Value {
return renderStoreUntyped(ofile, value, ptr, ptr_type.key.alignment, ptr_type.key.vol);
}
pub fn renderAlloca(
ofile: *ObjectFile,
var_type: *Type,
name: []const u8,
alignment: Type.Pointer.Align,
) !*llvm.Value {
const llvm_var_type = try var_type.getLlvmType(ofile.arena, ofile.context);
const name_with_null = try std.cstr.addNullByte(ofile.arena, name);
const result = llvm.BuildAlloca(ofile.builder, llvm_var_type, @ptrCast([*:0]const u8, name_with_null.ptr)) orelse return error.OutOfMemory;
llvm.SetAlignment(result, resolveAlign(ofile, alignment, llvm_var_type));
return result;
}
pub fn resolveAlign(ofile: *ObjectFile, alignment: Type.Pointer.Align, llvm_type: *llvm.Type) u32 {
return switch (alignment) {
.Abi => return llvm.ABIAlignmentOfType(ofile.comp.target_data_ref, llvm_type),
.Override => |a| a,
const MCValue = union(enum) {
none,
unreach,
/// A pointer-sized integer that fits in a register.
immediate: u64,
/// Refers to the index into `constants` field of `Function`.
local_const_ptr: usize,
};
}
fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue {
switch (inst.tag) {
.unreach => return self.genPanic(inst.src),
.constant => unreachable, // excluded from function bodies
.assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?),
.ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?),
}
}
fn genPanic(self: *Function, src: usize) !MCValue {
// TODO change this to call the panic function
switch (self.module.target.cpu.arch) {
.i386, .x86_64 => {
try self.code.append(0xcc); // x86 int3
},
else => return self.fail(src, "TODO implement panic for {}", .{self.module.target.cpu.arch}),
}
return .unreach;
}
fn genAsm(self: *Function, inst: *ir.Inst.Assembly) !MCValue {
return self.fail(inst.base.src, "TODO machine code gen assembly", .{});
}
fn genPtrToInt(self: *Function, inst: *ir.Inst.PtrToInt) !MCValue {
// no-op
return self.resolveInst(inst.args.ptr);
}
fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue {
if (inst.cast(ir.Inst.Constant)) |const_inst| {
switch (inst.ty.zigTypeTag()) {
.Int => {
const info = inst.ty.intInfo(self.module.target);
const ptr_bits = self.module.target.cpu.arch.ptrBitWidth();
if (info.bits > ptr_bits or info.signed) {
return self.fail(inst.src, "TODO const int bigger than ptr and signed int", .{});
}
return MCValue{ .immediate = const_inst.val.toUnsignedInt() };
},
else => return self.fail(inst.src, "TODO implement const of type '{}'", .{inst.ty}),
}
} else {
return self.inst_table.getValue(inst).?;
}
}
fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } {
@setCold(true);
const msg = try std.fmt.allocPrint(self.errors.allocator, format, args);
{
errdefer self.errors.allocator.free(msg);
(try self.errors.addOne()).* = .{
.byte_offset = src,
.msg = msg,
};
}
return error.CodegenFail;
}
};

View File

@ -724,7 +724,16 @@ pub fn main() anyerror!void {
}
const link = @import("link.zig");
try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out");
var result = try link.updateExecutableFilePath(allocator, analyzed_module, std.fs.cwd(), "a.out");
defer result.deinit(allocator);
if (result.errors.len != 0) {
for (result.errors) |err_msg| {
const loc = findLineColumn(source, err_msg.byte_offset);
std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
}
if (debug_error_trace) return error.ParseFailure;
std.process.exit(1);
}
}
fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {

View File

@ -5,15 +5,38 @@ const Allocator = std.mem.Allocator;
const ir = @import("ir.zig");
const fs = std.fs;
const elf = std.elf;
const codegen = @import("codegen.zig");
const executable_mode = 0o755;
const default_entry_addr = 0x8000000;
pub const ErrorMsg = struct {
byte_offset: usize,
msg: []const u8,
};
pub const Result = struct {
errors: []ErrorMsg,
pub fn deinit(self: *Result, allocator: *mem.Allocator) void {
for (self.errors) |err| {
allocator.free(err.msg);
}
allocator.free(self.errors);
self.* = undefined;
}
};
/// Attempts incremental linking, if the file already exists.
/// If incremental linking fails, falls back to truncating the file and rewriting it.
/// A malicious file is detected as incremental link failure and does not cause Illegal Behavior.
/// This operation is not atomic.
pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void {
pub fn updateExecutableFilePath(
allocator: *Allocator,
module: ir.Module,
dir: fs.Dir,
sub_path: []const u8,
) !Result {
const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = executable_mode });
defer file.close();
@ -21,12 +44,18 @@ pub fn updateExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: f
}
/// Atomically overwrites the old file, if present.
pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs.Dir, sub_path: []const u8) !void {
pub fn writeExecutableFilePath(
allocator: *Allocator,
module: ir.Module,
dir: fs.Dir,
sub_path: []const u8,
) !Result {
const af = try dir.atomicFile(sub_path, .{ .mode = executable_mode });
defer af.deinit();
try writeExecutableFile(allocator, module, af.file);
const result = try writeExecutableFile(allocator, module, af.file);
try af.finish();
return result;
}
/// Attempts incremental linking, if the file already exists.
@ -34,8 +63,8 @@ pub fn writeExecutableFilePath(allocator: *Allocator, module: ir.Module, dir: fs
/// Returns an error if `file` is not already open with +read +write +seek abilities.
/// A malicious file is detected as incremental link failure and does not cause Illegal Behavior.
/// This operation is not atomic.
pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
updateExecutableFileInner(allocator, module, file) catch |err| switch (err) {
pub fn updateExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result {
return updateExecutableFileInner(allocator, module, file) catch |err| switch (err) {
error.IncrFailed => {
return writeExecutableFile(allocator, module, file);
},
@ -66,20 +95,17 @@ const Update = struct {
text_section_index: ?u16,
symtab_section_index: ?u16,
/// Key: index into strtab. Value: index into symbols.
symbol_table: std.AutoHashMap(usize, usize),
/// The same order as in the file
symbols: std.ArrayList(elf.Elf64_Sym),
/// Sorted by address, index into symbols
symbols_by_addr: std.ArrayList(usize),
errors: std.ArrayList(ErrorMsg),
fn deinit(self: *Update) void {
self.sections.deinit();
self.program_headers.deinit();
self.shstrtab.deinit();
self.symbol_table.deinit();
self.symbols.deinit();
self.symbols_by_addr.deinit();
self.errors.deinit();
}
// `expand_num / expand_den` is the factor of padding when allocation
@ -162,6 +188,7 @@ const Update = struct {
fn makeString(self: *Update, bytes: []const u8) !u32 {
const result = self.shstrtab.items.len;
try self.shstrtab.appendSlice(bytes);
try self.shstrtab.append(0);
return @intCast(u32, result);
}
@ -187,6 +214,7 @@ const Update = struct {
const file_size = 256 * 1024;
const p_align = 0x1000;
const off = self.findFreeSpace(file_size, p_align);
//std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
try self.program_headers.append(.{
.p_type = elf.PT_LOAD,
.p_offset = off,
@ -194,10 +222,10 @@ const Update = struct {
.p_vaddr = default_entry_addr,
.p_paddr = default_entry_addr,
.p_memsz = 0,
.p_align = 0x1000,
.p_align = p_align,
.p_flags = elf.PF_X | elf.PF_R,
});
self.entry_addr = default_entry_addr;
self.entry_addr = null;
phdr_load_re_dirty = true;
phdr_table_dirty = true;
}
@ -220,6 +248,7 @@ const Update = struct {
if (self.shstrtab_index == null) {
self.shstrtab_index = @intCast(u16, self.sections.items.len);
const off = self.findFreeSpace(self.shstrtab.items.len, 1);
//std.debug.warn("found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len });
try self.sections.append(.{
.sh_name = try self.makeString(".shstrtab"),
.sh_type = elf.SHT_STRTAB,
@ -259,6 +288,7 @@ const Update = struct {
const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
const file_size = self.module.exports.len * each_size;
const off = self.findFreeSpace(file_size, min_align);
//std.debug.warn("found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
try self.sections.append(.{
.sh_name = try self.makeString(".symtab"),
@ -307,6 +337,7 @@ const Update = struct {
const needed_size = self.program_headers.items.len * phsize;
if (needed_size > allocated_size) {
self.phdr_table_offset = null; // free the space
self.phdr_table_offset = self.findFreeSpace(needed_size, phalign);
}
@ -361,6 +392,7 @@ const Update = struct {
const needed_size = self.sections.items.len * phsize;
if (needed_size > allocated_size) {
self.shdr_table_offset = null; // free the space
self.shdr_table_offset = self.findFreeSpace(needed_size, phalign);
}
@ -414,11 +446,30 @@ const Update = struct {
},
}
}
if (shstrtab_dirty) {
try self.file.pwriteAll(self.shstrtab.items, self.sections.items[self.shstrtab_index.?].sh_offset);
}
try self.writeCodeAndSymbols();
try self.writeElfHeader();
const shstrtab_sect = &self.sections.items[self.shstrtab_index.?];
if (shstrtab_dirty or self.shstrtab.items.len != shstrtab_sect.sh_size) {
const allocated_size = self.allocatedSize(shstrtab_sect.sh_offset);
const needed_size = self.shstrtab.items.len;
if (needed_size > allocated_size) {
shstrtab_sect.sh_size = 0; // free the space
shstrtab_sect.sh_offset = self.findFreeSpace(needed_size, 1);
shstrtab_sect.sh_size = needed_size;
}
try self.file.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset);
}
if (self.entry_addr == null) {
const msg = try std.fmt.allocPrint(self.errors.allocator, "no entry point found", .{});
errdefer self.errors.allocator.free(msg);
try self.errors.append(.{
.byte_offset = 0,
.msg = msg,
});
} else {
try self.writeElfHeader();
}
// TODO find end pos and truncate
}
@ -540,13 +591,122 @@ const Update = struct {
}
fn writeCodeAndSymbols(self: *Update) !void {
@panic("TODO writeCodeAndSymbols");
// index 0 is always a null symbol
try self.symbols.resize(1);
self.symbols.items[0] = .{
.st_name = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0,
.st_value = 0,
.st_size = 0,
};
const phdr = &self.program_headers.items[self.phdr_load_re_index.?];
var vaddr: u64 = phdr.p_vaddr;
var code = std.ArrayList(u8).init(self.sections.allocator);
defer code.deinit();
for (self.module.exports) |exp| {
code.shrink(0);
var symbol = try codegen.generateSymbol(exp.typed_value, self.module.*, &code);
defer symbol.deinit(code.allocator);
if (symbol.errors.len != 0) {
for (symbol.errors) |err| {
const msg = try mem.dupe(self.errors.allocator, u8, err.msg);
errdefer self.errors.allocator.free(msg);
try self.errors.append(.{
.byte_offset = err.byte_offset,
.msg = msg,
});
}
continue;
}
if (mem.eql(u8, exp.name, "_start")) {
self.entry_addr = vaddr;
}
(try self.symbols.addOne()).* = .{
.st_name = try self.makeString(exp.name),
.st_info = (elf.STB_LOCAL << 4) | elf.STT_FUNC,
.st_other = 0,
.st_shndx = self.text_section_index.?,
.st_value = vaddr,
.st_size = code.items.len,
};
vaddr += code.items.len;
}
return self.writeSymbols();
}
fn writeSymbols(self: *Update) !void {
const ptr_width: enum { p32, p64 } = switch (self.module.target.cpu.arch.ptrBitWidth()) {
32 => .p32,
64 => .p64,
else => return error.UnsupportedArchitecture,
};
const small_ptr = ptr_width == .p32;
const syms_sect = &self.sections.items[self.symtab_section_index.?];
const sym_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym);
const sym_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
const allocated_size = self.allocatedSize(syms_sect.sh_offset);
const needed_size = self.symbols.items.len * sym_size;
if (needed_size > allocated_size) {
syms_sect.sh_size = 0; // free the space
syms_sect.sh_offset = self.findFreeSpace(needed_size, sym_align);
syms_sect.sh_size = needed_size;
}
const allocator = self.symbols.allocator;
const foreign_endian = self.module.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
switch (ptr_width) {
.p32 => {
const buf = try allocator.alloc(elf.Elf32_Sym, self.symbols.items.len);
defer allocator.free(buf);
for (buf) |*sym, i| {
sym.* = .{
.st_name = self.symbols.items[i].st_name,
.st_value = @intCast(u32, self.symbols.items[i].st_value),
.st_size = @intCast(u32, self.symbols.items[i].st_size),
.st_info = self.symbols.items[i].st_info,
.st_other = self.symbols.items[i].st_other,
.st_shndx = self.symbols.items[i].st_shndx,
};
if (foreign_endian) {
bswapAllFields(elf.Elf32_Sym, sym);
}
}
try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset);
},
.p64 => {
const buf = try allocator.alloc(elf.Elf64_Sym, self.symbols.items.len);
defer allocator.free(buf);
for (buf) |*sym, i| {
sym.* = .{
.st_name = self.symbols.items[i].st_name,
.st_value = self.symbols.items[i].st_value,
.st_size = self.symbols.items[i].st_size,
.st_info = self.symbols.items[i].st_info,
.st_other = self.symbols.items[i].st_other,
.st_shndx = self.symbols.items[i].st_shndx,
};
if (foreign_endian) {
bswapAllFields(elf.Elf64_Sym, sym);
}
}
try self.file.pwriteAll(mem.sliceAsBytes(buf), syms_sect.sh_offset);
},
}
}
};
/// Truncates the existing file contents and overwrites the contents.
/// Returns an error if `file` is not already open with +read +write +seek abilities.
pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.File) !Result {
var update = Update{
.file = file,
.module = &module,
@ -561,17 +721,20 @@ pub fn writeExecutableFile(allocator: *Allocator, module: ir.Module, file: fs.Fi
.text_section_index = null,
.symtab_section_index = null,
.symbol_table = std.AutoHashMap(usize, usize).init(allocator),
.symbols = std.ArrayList(elf.Elf64_Sym).init(allocator),
.symbols_by_addr = std.ArrayList(usize).init(allocator),
.errors = std.ArrayList(ErrorMsg).init(allocator),
};
defer update.deinit();
return update.perform();
try update.perform();
return Result{
.errors = update.errors.toOwnedSlice(),
};
}
/// Returns error.IncrFailed if incremental update could not be performed.
fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !void {
fn updateExecutableFileInner(allocator: *Allocator, module: ir.Module, file: fs.File) !Result {
//var ehdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 = undefined;
// TODO implement incremental linking

View File

@ -264,6 +264,56 @@ pub const Value = extern union {
}
}
/// Asserts the value is an integer and it fits in a u64
pub fn toUnsignedInt(self: Value) u64 {
switch (self.tag()) {
.ty,
.u8_type,
.i8_type,
.isize_type,
.usize_type,
.c_short_type,
.c_ushort_type,
.c_int_type,
.c_uint_type,
.c_long_type,
.c_ulong_type,
.c_longlong_type,
.c_ulonglong_type,
.c_longdouble_type,
.f16_type,
.f32_type,
.f64_type,
.f128_type,
.c_void_type,
.bool_type,
.void_type,
.type_type,
.anyerror_type,
.comptime_int_type,
.comptime_float_type,
.noreturn_type,
.fn_naked_noreturn_no_args_type,
.single_const_pointer_to_comptime_int_type,
.const_slice_u8_type,
.void_value,
.noreturn_value,
.bool_true,
.bool_false,
.function,
.ref,
.ref_val,
.bytes,
=> unreachable,
.zero => return 0,
.int_u64 => return self.cast(Payload.Int_u64).?.int,
.int_i64 => return @intCast(u64, self.cast(Payload.Int_u64).?.int),
.int_big => return self.cast(Payload.IntBig).?.big_int.to(u64) catch unreachable,
}
}
/// Asserts the value is an integer, and the destination type is ComptimeInt or Int.
pub fn intFitsInType(self: Value, ty: Type, target: Target) bool {
switch (self.tag()) {