- add call frame instruction parser

- add register printing
This commit is contained in:
kcbanner 2023-05-07 20:00:54 -04:00
parent 6c1d1aa45c
commit 8b8d627137
3 changed files with 298 additions and 2 deletions

View File

@ -14,6 +14,8 @@ pub const LANG = @import("dwarf/LANG.zig");
pub const FORM = @import("dwarf/FORM.zig");
pub const ATE = @import("dwarf/ATE.zig");
pub const EH = @import("dwarf/EH.zig");
pub const abi = @import("dwarf/abi.zig");
pub const call_frame = @import("dwarf/call_frame.zig");
pub const LLE = struct {
pub const end_of_list = 0x00;
@ -1490,7 +1492,8 @@ pub const DwarfInfo = struct {
length = try reader.readInt(u64, di.endian);
}
const entry_bytes = eh_frame[stream.pos..][0..length];
const id_len = @as(u8, if (is_64) 8 else 4);
const entry_bytes = eh_frame[stream.pos..][0..length - id_len];
const id = try reader.readInt(u32, di.endian);
// TODO: Get section_offset here (pass in from headers)
@ -1658,7 +1661,8 @@ pub const CommonInformationEntry = struct {
return false;
}
// The returned struct references memory backed by cie_bytes
// This function expects to read the CIE starting with the version field.
// The returned struct references memory backed by cie_bytes.
pub fn parse(
cie_bytes: []const u8,
section_base: u64,
@ -1775,6 +1779,7 @@ pub const FrameDescriptionEntry = struct {
aug_data: []const u8,
instructions: []const u8,
// This function expects to read the FDE starting with the PC Begin field
pub fn parse(
fde_bytes: []const u8,
section_base: u64,

54
lib/std/dwarf/abi.zig Normal file
View File

@ -0,0 +1,54 @@
const std = @import("../std.zig");
fn writeUnknownReg(writer: anytype, reg_number: u8) !void {
try writer.print("reg{}", .{ reg_number });
}
pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void {
if (arch) |a| {
switch (a) {
.x86_64 => {
switch (reg_number) {
0 => try writer.writeAll("RAX"),
1 => try writer.writeAll("RDX"),
2 => try writer.writeAll("RCX"),
3 => try writer.writeAll("RBX"),
4 => try writer.writeAll("RSI"),
5 => try writer.writeAll("RDI"),
6 => try writer.writeAll("RBP"),
7 => try writer.writeAll("RSP"),
8...15 => try writer.print("R{}", .{ reg_number }),
16 => try writer.writeAll("RIP"),
17...32 => try writer.print("XMM{}", .{ reg_number - 17 }),
33...40 => try writer.print("ST{}", .{ reg_number - 33 }),
41...48 => try writer.print("MM{}", .{ reg_number - 41 }),
49 => try writer.writeAll("RFLAGS"),
50 => try writer.writeAll("ES"),
51 => try writer.writeAll("CS"),
52 => try writer.writeAll("SS"),
53 => try writer.writeAll("DS"),
54 => try writer.writeAll("FS"),
55 => try writer.writeAll("GS"),
// 56-57 Reserved
58 => try writer.writeAll("FS.BASE"),
59 => try writer.writeAll("GS.BASE"),
// 60-61 Reserved
62 => try writer.writeAll("TR"),
63 => try writer.writeAll("LDTR"),
64 => try writer.writeAll("MXCSR"),
65 => try writer.writeAll("FCW"),
66 => try writer.writeAll("FSW"),
67...82 => try writer.print("XMM{}", .{ reg_number - 51 }),
// 83-117 Reserved
118...125 => try writer.print("K{}", .{ reg_number - 118 }),
// 126-129 Reserved
else => try writeUnknownReg(writer, reg_number),
}
},
// TODO: Add x86, aarch64
else => try writeUnknownReg(writer, reg_number),
}
} else try writeUnknownReg(writer, reg_number);
}

View File

@ -0,0 +1,237 @@
const std = @import("../std.zig");
const debug = std.debug;
const leb = @import("../leb128.zig");
const abi = @import("abi.zig");
const dwarf = @import("../dwarf.zig");
// These enum values correspond to the opcode encoding itself, with
// the exception of the opcodes that include data in the opcode itself.
// For those, the enum value is the opcode with the lower 6 bits (the data) masked to 0.
const Opcode = enum(u8) {
// These are placeholders that define the range of vendor-specific opcodes
const lo_user = 0x1c;
const hi_user = 0x3f;
advance_loc = 0x1 << 6,
offset = 0x2 << 6,
restore = 0x3 << 6,
nop = 0x00,
set_loc = 0x01,
advance_loc1 = 0x02,
advance_loc2 = 0x03,
advance_loc4 = 0x04,
offset_extended = 0x05,
restore_extended = 0x06,
undefined = 0x07,
same_value = 0x08,
register = 0x09,
remember_state = 0x0a,
restore_state = 0x0b,
def_cfa = 0x0c,
def_cfa_register = 0x0d,
def_cfa_offset = 0x0e,
def_cfa_expression = 0x0f,
expression = 0x10,
offset_extended_sf = 0x11,
def_cfa_sf = 0x12,
def_cfa_offset_sf = 0x13,
val_offset = 0x14,
val_offset_sf = 0x15,
val_expression = 0x16,
_,
};
const Operand = enum {
opcode_delta,
opcode_register,
uleb128_register,
uleb128_offset,
sleb128_offset,
address,
u8_delta,
u16_delta,
u32_delta,
block,
fn Storage(comptime self: Operand) type {
return switch (self) {
.opcode_delta, .opcode_register => u6,
.uleb128_register => u8,
.uleb128_offset => u64,
.sleb128_offset => i64,
.address => u64,
.u8_delta => u8,
.u16_delta => u16,
.u32_delta => u32,
.block => []const u8,
};
}
fn read(
comptime self: Operand,
reader: anytype,
opcode_value: ?u6,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !Storage(self) {
return switch (self) {
.opcode_delta, .opcode_register => opcode_value orelse return error.InvalidOperand,
.uleb128_register => try leb.readULEB128(u8, reader),
.uleb128_offset => try leb.readULEB128(u64, reader),
.sleb128_offset => try leb.readILEB128(i64, reader),
.address => switch (addr_size_bytes) {
2 => try reader.readInt(u16, endian),
4 => try reader.readInt(u32, endian),
8 => try reader.readInt(u64, endian),
else => return error.InvalidAddrSize,
},
.u8_delta => try reader.readByte(),
.u16_delta => try reader.readInt(u16, endian),
.u32_delta => try reader.readInt(u32, endian),
.block => {
const block_len = try leb.readULEB128(u64, reader);
// TODO: This feels like a kludge, change to FixedBufferStream param?
const block = reader.context.buffer[reader.context.pos..][0..block_len];
reader.context.pos += block_len;
return block;
}
};
}
};
fn InstructionType(comptime definition: anytype) type {
const definition_type = @typeInfo(@TypeOf(definition));
debug.assert(definition_type == .Struct);
const definition_len = definition_type.Struct.fields.len;
comptime var fields: [definition_len]std.builtin.Type.StructField = undefined;
inline for (definition_type.Struct.fields, &fields) |definition_field, *operands_field| {
const opcode = std.enums.nameCast(Operand, @field(definition, definition_field.name));
const storage_type = opcode.Storage();
operands_field.* = .{
.name = definition_field.name,
.type = storage_type,
.default_value = null,
.is_comptime = false,
.alignment = @alignOf(storage_type),
};
}
const InstructionOperands = @Type(.{
.Struct = .{
.layout = .Auto,
.fields = &fields,
.decls = &.{},
.is_tuple = false,
},
});
return struct {
const Self = @This();
operands: InstructionOperands,
pub fn read(reader: anytype, opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian) !Self {
var operands: InstructionOperands = undefined;
inline for (definition_type.Struct.fields) |definition_field| {
const operand = comptime std.enums.nameCast(Operand, @field(definition, definition_field.name));
@field(operands, definition_field.name) = try operand.read(reader, opcode_value, addr_size_bytes, endian);
}
return .{ .operands = operands };
}
};
}
pub const Instruction = union(Opcode) {
advance_loc: InstructionType(.{ .delta = .opcode_delta }),
offset: InstructionType(.{ .register = .opcode_register, .offset = .uleb128_offset }),
restore: InstructionType(.{ .register = .opcode_register }),
nop: InstructionType(.{}),
set_loc: InstructionType(.{ .address = .address }),
advance_loc1: InstructionType(.{ .delta = .u8_delta }),
advance_loc2: InstructionType(.{ .delta = .u16_delta }),
advance_loc4: InstructionType(.{ .delta = .u32_delta }),
offset_extended: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }),
restore_extended: InstructionType(.{ .register = .uleb128_register }),
undefined: InstructionType(.{ .register = .uleb128_register }),
same_value: InstructionType(.{ .register = .uleb128_register }),
register: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }),
remember_state: InstructionType(.{}),
restore_state: InstructionType(.{}),
def_cfa: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }),
def_cfa_register: InstructionType(.{ .register = .uleb128_register }),
def_cfa_offset: InstructionType(.{ .offset = .uleb128_offset }),
def_cfa_expression: InstructionType(.{ .block = .block }),
expression: InstructionType(.{ .register = .uleb128_register, .block = .block }),
offset_extended_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }),
def_cfa_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }),
def_cfa_offset_sf: InstructionType(.{ .offset = .sleb128_offset }),
val_offset: InstructionType(.{ .a = .uleb128_offset, .b = .uleb128_offset }),
val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }),
val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }),
pub fn read(reader: anytype, addr_size_bytes: u8, endian: std.builtin.Endian) !Instruction {
const opcode = try reader.readByte();
const upper = opcode & 0b11000000;
return switch (upper) {
inline @enumToInt(Opcode.advance_loc), @enumToInt(Opcode.offset), @enumToInt(Opcode.restore) => |u| @unionInit(
Instruction,
@tagName(@intToEnum(Opcode, u)),
try std.meta.TagPayload(Instruction, @intToEnum(Opcode, u)).read(reader, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian),
),
0 => blk: {
inline for (@typeInfo(Opcode).Enum.fields) |field| {
if (field.value == opcode) {
break :blk @unionInit(
Instruction,
@tagName(@intToEnum(Opcode, field.value)),
try std.meta.TagPayload(Instruction, @intToEnum(Opcode, field.value)).read(reader, null, addr_size_bytes, endian),
);
}
}
break :blk error.UnknownOpcode;
},
else => error.UnknownOpcode,
};
}
pub fn writeOperands(self: Instruction, writer: anytype, cie: dwarf.CommonInformationEntry, arch: ?std.Target.Cpu.Arch) !void {
switch (self) {
inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{ i.operands.delta * cie.code_alignment_factor }),
.offset => |i| {
try abi.writeRegisterName(writer, arch, i.operands.register);
try writer.print(" {}", .{ @intCast(i64, i.operands.offset) * cie.data_alignment_factor });
},
.restore => {},
.nop => {},
.set_loc => {},
.offset_extended => {},
.restore_extended => {},
.undefined => {},
.same_value => {},
.register => {},
.remember_state => {},
.restore_state => {},
.def_cfa => |i| {
try abi.writeRegisterName(writer, arch, i.operands.register);
try writer.print(" +{}", .{ i.operands.offset });
},
.def_cfa_register => {},
.def_cfa_offset => {},
.def_cfa_expression => |i| {
try writer.print("TODO parse expressions: {x}", .{ std.fmt.fmtSliceHexLower(i.operands.block) });
},
.expression => {},
.offset_extended_sf => {},
.def_cfa_sf => {},
.def_cfa_offset_sf => {},
.val_offset => {},
.val_offset_sf => {},
.val_expression => {},
}
}
};