From 8b8d6271371c66eb703bdbe6f13e0426c4c2436f Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 7 May 2023 20:00:54 -0400 Subject: [PATCH] - add call frame instruction parser - add register printing --- lib/std/dwarf.zig | 9 +- lib/std/dwarf/abi.zig | 54 ++++++++ lib/std/dwarf/call_frame.zig | 237 +++++++++++++++++++++++++++++++++++ 3 files changed, 298 insertions(+), 2 deletions(-) create mode 100644 lib/std/dwarf/abi.zig create mode 100644 lib/std/dwarf/call_frame.zig diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 16baa84c28..144c12470e 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -14,6 +14,8 @@ pub const LANG = @import("dwarf/LANG.zig"); pub const FORM = @import("dwarf/FORM.zig"); pub const ATE = @import("dwarf/ATE.zig"); pub const EH = @import("dwarf/EH.zig"); +pub const abi = @import("dwarf/abi.zig"); +pub const call_frame = @import("dwarf/call_frame.zig"); pub const LLE = struct { pub const end_of_list = 0x00; @@ -1490,7 +1492,8 @@ pub const DwarfInfo = struct { length = try reader.readInt(u64, di.endian); } - const entry_bytes = eh_frame[stream.pos..][0..length]; + const id_len = @as(u8, if (is_64) 8 else 4); + const entry_bytes = eh_frame[stream.pos..][0..length - id_len]; const id = try reader.readInt(u32, di.endian); // TODO: Get section_offset here (pass in from headers) @@ -1658,7 +1661,8 @@ pub const CommonInformationEntry = struct { return false; } - // The returned struct references memory backed by cie_bytes + // This function expects to read the CIE starting with the version field. + // The returned struct references memory backed by cie_bytes. pub fn parse( cie_bytes: []const u8, section_base: u64, @@ -1775,6 +1779,7 @@ pub const FrameDescriptionEntry = struct { aug_data: []const u8, instructions: []const u8, + // This function expects to read the FDE starting with the PC Begin field pub fn parse( fde_bytes: []const u8, section_base: u64, diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig new file mode 100644 index 0000000000..9ffb908b73 --- /dev/null +++ b/lib/std/dwarf/abi.zig @@ -0,0 +1,54 @@ +const std = @import("../std.zig"); + +fn writeUnknownReg(writer: anytype, reg_number: u8) !void { + try writer.print("reg{}", .{ reg_number }); +} + +pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void { + if (arch) |a| { + switch (a) { + .x86_64 => { + switch (reg_number) { + 0 => try writer.writeAll("RAX"), + 1 => try writer.writeAll("RDX"), + 2 => try writer.writeAll("RCX"), + 3 => try writer.writeAll("RBX"), + 4 => try writer.writeAll("RSI"), + 5 => try writer.writeAll("RDI"), + 6 => try writer.writeAll("RBP"), + 7 => try writer.writeAll("RSP"), + 8...15 => try writer.print("R{}", .{ reg_number }), + 16 => try writer.writeAll("RIP"), + 17...32 => try writer.print("XMM{}", .{ reg_number - 17 }), + 33...40 => try writer.print("ST{}", .{ reg_number - 33 }), + 41...48 => try writer.print("MM{}", .{ reg_number - 41 }), + 49 => try writer.writeAll("RFLAGS"), + 50 => try writer.writeAll("ES"), + 51 => try writer.writeAll("CS"), + 52 => try writer.writeAll("SS"), + 53 => try writer.writeAll("DS"), + 54 => try writer.writeAll("FS"), + 55 => try writer.writeAll("GS"), + // 56-57 Reserved + 58 => try writer.writeAll("FS.BASE"), + 59 => try writer.writeAll("GS.BASE"), + // 60-61 Reserved + 62 => try writer.writeAll("TR"), + 63 => try writer.writeAll("LDTR"), + 64 => try writer.writeAll("MXCSR"), + 65 => try writer.writeAll("FCW"), + 66 => try writer.writeAll("FSW"), + 67...82 => try writer.print("XMM{}", .{ reg_number - 51 }), + // 83-117 Reserved + 118...125 => try writer.print("K{}", .{ reg_number - 118 }), + // 126-129 Reserved + else => try writeUnknownReg(writer, reg_number), + } + }, + + // TODO: Add x86, aarch64 + + else => try writeUnknownReg(writer, reg_number), + } + } else try writeUnknownReg(writer, reg_number); +} diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig new file mode 100644 index 0000000000..34bd82f7c9 --- /dev/null +++ b/lib/std/dwarf/call_frame.zig @@ -0,0 +1,237 @@ +const std = @import("../std.zig"); +const debug = std.debug; +const leb = @import("../leb128.zig"); +const abi = @import("abi.zig"); +const dwarf = @import("../dwarf.zig"); + +// These enum values correspond to the opcode encoding itself, with +// the exception of the opcodes that include data in the opcode itself. +// For those, the enum value is the opcode with the lower 6 bits (the data) masked to 0. +const Opcode = enum(u8) { + // These are placeholders that define the range of vendor-specific opcodes + const lo_user = 0x1c; + const hi_user = 0x3f; + + advance_loc = 0x1 << 6, + offset = 0x2 << 6, + restore = 0x3 << 6, + nop = 0x00, + set_loc = 0x01, + advance_loc1 = 0x02, + advance_loc2 = 0x03, + advance_loc4 = 0x04, + offset_extended = 0x05, + restore_extended = 0x06, + undefined = 0x07, + same_value = 0x08, + register = 0x09, + remember_state = 0x0a, + restore_state = 0x0b, + def_cfa = 0x0c, + def_cfa_register = 0x0d, + def_cfa_offset = 0x0e, + def_cfa_expression = 0x0f, + expression = 0x10, + offset_extended_sf = 0x11, + def_cfa_sf = 0x12, + def_cfa_offset_sf = 0x13, + val_offset = 0x14, + val_offset_sf = 0x15, + val_expression = 0x16, + + _, +}; + +const Operand = enum { + opcode_delta, + opcode_register, + uleb128_register, + uleb128_offset, + sleb128_offset, + address, + u8_delta, + u16_delta, + u32_delta, + block, + + fn Storage(comptime self: Operand) type { + return switch (self) { + .opcode_delta, .opcode_register => u6, + .uleb128_register => u8, + .uleb128_offset => u64, + .sleb128_offset => i64, + .address => u64, + .u8_delta => u8, + .u16_delta => u16, + .u32_delta => u32, + .block => []const u8, + }; + } + + fn read( + comptime self: Operand, + reader: anytype, + opcode_value: ?u6, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Storage(self) { + return switch (self) { + .opcode_delta, .opcode_register => opcode_value orelse return error.InvalidOperand, + .uleb128_register => try leb.readULEB128(u8, reader), + .uleb128_offset => try leb.readULEB128(u64, reader), + .sleb128_offset => try leb.readILEB128(i64, reader), + .address => switch (addr_size_bytes) { + 2 => try reader.readInt(u16, endian), + 4 => try reader.readInt(u32, endian), + 8 => try reader.readInt(u64, endian), + else => return error.InvalidAddrSize, + }, + .u8_delta => try reader.readByte(), + .u16_delta => try reader.readInt(u16, endian), + .u32_delta => try reader.readInt(u32, endian), + .block => { + const block_len = try leb.readULEB128(u64, reader); + + // TODO: This feels like a kludge, change to FixedBufferStream param? + const block = reader.context.buffer[reader.context.pos..][0..block_len]; + reader.context.pos += block_len; + + return block; + } + }; + } +}; + +fn InstructionType(comptime definition: anytype) type { + const definition_type = @typeInfo(@TypeOf(definition)); + debug.assert(definition_type == .Struct); + + const definition_len = definition_type.Struct.fields.len; + comptime var fields: [definition_len]std.builtin.Type.StructField = undefined; + inline for (definition_type.Struct.fields, &fields) |definition_field, *operands_field| { + const opcode = std.enums.nameCast(Operand, @field(definition, definition_field.name)); + const storage_type = opcode.Storage(); + operands_field.* = .{ + .name = definition_field.name, + .type = storage_type, + .default_value = null, + .is_comptime = false, + .alignment = @alignOf(storage_type), + }; + } + + const InstructionOperands = @Type(.{ + .Struct = .{ + .layout = .Auto, + .fields = &fields, + .decls = &.{}, + .is_tuple = false, + }, + }); + + return struct { + const Self = @This(); + operands: InstructionOperands, + + pub fn read(reader: anytype, opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian) !Self { + var operands: InstructionOperands = undefined; + inline for (definition_type.Struct.fields) |definition_field| { + const operand = comptime std.enums.nameCast(Operand, @field(definition, definition_field.name)); + @field(operands, definition_field.name) = try operand.read(reader, opcode_value, addr_size_bytes, endian); + } + + return .{ .operands = operands }; + } + }; +} + +pub const Instruction = union(Opcode) { + advance_loc: InstructionType(.{ .delta = .opcode_delta }), + offset: InstructionType(.{ .register = .opcode_register, .offset = .uleb128_offset }), + restore: InstructionType(.{ .register = .opcode_register }), + nop: InstructionType(.{}), + set_loc: InstructionType(.{ .address = .address }), + advance_loc1: InstructionType(.{ .delta = .u8_delta }), + advance_loc2: InstructionType(.{ .delta = .u16_delta }), + advance_loc4: InstructionType(.{ .delta = .u32_delta }), + offset_extended: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + restore_extended: InstructionType(.{ .register = .uleb128_register }), + undefined: InstructionType(.{ .register = .uleb128_register }), + same_value: InstructionType(.{ .register = .uleb128_register }), + register: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + remember_state: InstructionType(.{}), + restore_state: InstructionType(.{}), + def_cfa: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + def_cfa_register: InstructionType(.{ .register = .uleb128_register }), + def_cfa_offset: InstructionType(.{ .offset = .uleb128_offset }), + def_cfa_expression: InstructionType(.{ .block = .block }), + expression: InstructionType(.{ .register = .uleb128_register, .block = .block }), + offset_extended_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), + def_cfa_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), + def_cfa_offset_sf: InstructionType(.{ .offset = .sleb128_offset }), + val_offset: InstructionType(.{ .a = .uleb128_offset, .b = .uleb128_offset }), + val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }), + val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }), + + pub fn read(reader: anytype, addr_size_bytes: u8, endian: std.builtin.Endian) !Instruction { + const opcode = try reader.readByte(); + const upper = opcode & 0b11000000; + return switch (upper) { + inline @enumToInt(Opcode.advance_loc), @enumToInt(Opcode.offset), @enumToInt(Opcode.restore) => |u| @unionInit( + Instruction, + @tagName(@intToEnum(Opcode, u)), + try std.meta.TagPayload(Instruction, @intToEnum(Opcode, u)).read(reader, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian), + ), + 0 => blk: { + inline for (@typeInfo(Opcode).Enum.fields) |field| { + if (field.value == opcode) { + break :blk @unionInit( + Instruction, + @tagName(@intToEnum(Opcode, field.value)), + try std.meta.TagPayload(Instruction, @intToEnum(Opcode, field.value)).read(reader, null, addr_size_bytes, endian), + ); + } + } + break :blk error.UnknownOpcode; + }, + else => error.UnknownOpcode, + }; + } + + pub fn writeOperands(self: Instruction, writer: anytype, cie: dwarf.CommonInformationEntry, arch: ?std.Target.Cpu.Arch) !void { + switch (self) { + inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{ i.operands.delta * cie.code_alignment_factor }), + .offset => |i| { + try abi.writeRegisterName(writer, arch, i.operands.register); + try writer.print(" {}", .{ @intCast(i64, i.operands.offset) * cie.data_alignment_factor }); + }, + .restore => {}, + .nop => {}, + .set_loc => {}, + .offset_extended => {}, + .restore_extended => {}, + .undefined => {}, + .same_value => {}, + .register => {}, + .remember_state => {}, + .restore_state => {}, + .def_cfa => |i| { + try abi.writeRegisterName(writer, arch, i.operands.register); + try writer.print(" +{}", .{ i.operands.offset }); + }, + .def_cfa_register => {}, + .def_cfa_offset => {}, + .def_cfa_expression => |i| { + try writer.print("TODO parse expressions: {x}", .{ std.fmt.fmtSliceHexLower(i.operands.block) }); + }, + .expression => {}, + .offset_extended_sf => {}, + .def_cfa_sf => {}, + .def_cfa_offset_sf => {}, + .val_offset => {}, + .val_offset_sf => {}, + .val_expression => {}, + } + } + +};