diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 9ffb908b73..4d73a5d23e 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -1,7 +1,7 @@ const std = @import("../std.zig"); fn writeUnknownReg(writer: anytype, reg_number: u8) !void { - try writer.print("reg{}", .{ reg_number }); + try writer.print("reg{}", .{reg_number}); } pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void { @@ -17,11 +17,11 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number 5 => try writer.writeAll("RDI"), 6 => try writer.writeAll("RBP"), 7 => try writer.writeAll("RSP"), - 8...15 => try writer.print("R{}", .{ reg_number }), + 8...15 => try writer.print("R{}", .{reg_number}), 16 => try writer.writeAll("RIP"), - 17...32 => try writer.print("XMM{}", .{ reg_number - 17 }), - 33...40 => try writer.print("ST{}", .{ reg_number - 33 }), - 41...48 => try writer.print("MM{}", .{ reg_number - 41 }), + 17...32 => try writer.print("XMM{}", .{reg_number - 17}), + 33...40 => try writer.print("ST{}", .{reg_number - 33}), + 41...48 => try writer.print("MM{}", .{reg_number - 41}), 49 => try writer.writeAll("RFLAGS"), 50 => try writer.writeAll("ES"), 51 => try writer.writeAll("CS"), @@ -38,9 +38,9 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number 64 => try writer.writeAll("MXCSR"), 65 => try writer.writeAll("FCW"), 66 => try writer.writeAll("FSW"), - 67...82 => try writer.print("XMM{}", .{ reg_number - 51 }), + 67...82 => try writer.print("XMM{}", .{reg_number - 51}), // 83-117 Reserved - 118...125 => try writer.print("K{}", .{ reg_number - 118 }), + 118...125 => try writer.print("K{}", .{reg_number - 118}), // 126-129 Reserved else => try writeUnknownReg(writer, reg_number), } @@ -52,3 +52,23 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number } } else try writeUnknownReg(writer, reg_number); } + +const FormatRegisterData = struct { + reg_number: u8, + arch: ?std.Target.Cpu.Arch, +}; + +pub fn formatRegister( + data: FormatRegisterData, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = fmt; + _ = options; + try writeRegisterName(writer, data.arch, data.reg_number); +} + +pub fn fmtRegister(reg_number: u8, arch: ?std.Target.Cpu.Arch) std.fmt.Formatter(formatRegister) { + return .{ .data = .{ .reg_number = reg_number, .arch = arch } }; +} diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 467b5061b1..a40094d608 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -3,18 +3,13 @@ const debug = std.debug; const leb = @import("../leb128.zig"); const abi = @import("abi.zig"); const dwarf = @import("../dwarf.zig"); +const expressions = @import("expressions.zig"); -// These enum values correspond to the opcode encoding itself, with -// the exception of the opcodes that include data in the opcode itself. -// For those, the enum value is the opcode with the lower 6 bits (the data) masked to 0. const Opcode = enum(u8) { - // These are placeholders that define the range of vendor-specific opcodes - const lo_user = 0x1c; - const hi_user = 0x3f; - advance_loc = 0x1 << 6, offset = 0x2 << 6, restore = 0x3 << 6, + nop = 0x00, set_loc = 0x01, advance_loc1 = 0x02, @@ -39,7 +34,17 @@ const Opcode = enum(u8) { val_offset_sf = 0x15, val_expression = 0x16, - _, + // These opcodes encode an operand in the lower 6 bits of the opcode itself + pub const lo_inline = Opcode.advance_loc; + pub const hi_inline = Opcode.restore; + + // These opcodes are trailed by zero or more operands + pub const lo_reserved = Opcode.nop; + pub const hi_reserved = Opcode.val_expression; + + // Vendor-specific opcodes + pub const lo_user = 0x1c; + pub const hi_user = 0x3f; }; const Operand = enum { @@ -70,11 +75,12 @@ const Operand = enum { fn read( comptime self: Operand, - reader: anytype, + stream: *std.io.FixedBufferStream([]const u8), opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Storage(self) { + const reader = stream.reader(); return switch (self) { .opcode_delta, .opcode_register => opcode_value orelse return error.InvalidOperand, .uleb128_register => try leb.readULEB128(u8, reader), @@ -91,13 +97,13 @@ const Operand = enum { .u32_delta => try reader.readInt(u32, endian), .block => { const block_len = try leb.readULEB128(u64, reader); + if (stream.pos + block_len > stream.buffer.len) return error.InvalidOperand; - // TODO: This feels like a kludge, change to FixedBufferStream param? - const block = reader.context.buffer[reader.context.pos..][0..block_len]; + const block = stream.buffer[stream.pos..][0..block_len]; reader.context.pos += block_len; return block; - } + }, }; } }; @@ -133,11 +139,16 @@ fn InstructionType(comptime definition: anytype) type { const Self = @This(); operands: InstructionOperands, - pub fn read(reader: anytype, opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian) !Self { + pub fn read( + stream: *std.io.FixedBufferStream([]const u8), + opcode_value: ?u6, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Self { var operands: InstructionOperands = undefined; inline for (definition_type.Struct.fields) |definition_field| { const operand = comptime std.enums.nameCast(Operand, @field(definition, definition_field.name)); - @field(operands, definition_field.name) = try operand.read(reader, opcode_value, addr_size_bytes, endian); + @field(operands, definition_field.name) = try operand.read(stream, opcode_value, addr_size_bytes, endian); } return .{ .operands = operands }; @@ -173,37 +184,44 @@ pub const Instruction = union(Opcode) { val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }), val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }), - pub fn read(reader: anytype, addr_size_bytes: u8, endian: std.builtin.Endian) !Instruction { - const opcode = try reader.readByte(); - const upper = opcode & 0b11000000; - return switch (upper) { - inline @enumToInt(Opcode.advance_loc), @enumToInt(Opcode.offset), @enumToInt(Opcode.restore) => |u| @unionInit( - Instruction, - @tagName(@intToEnum(Opcode, u)), - try std.meta.TagPayload(Instruction, @intToEnum(Opcode, u)).read(reader, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian), - ), - 0 => blk: { - inline for (@typeInfo(Opcode).Enum.fields) |field| { - if (field.value == opcode) { - break :blk @unionInit( - Instruction, - @tagName(@intToEnum(Opcode, field.value)), - try std.meta.TagPayload(Instruction, @intToEnum(Opcode, field.value)).read(reader, null, addr_size_bytes, endian), - ); - } - } - break :blk error.UnknownOpcode; + pub fn read( + stream: *std.io.FixedBufferStream([]const u8), + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Instruction { + @setEvalBranchQuota(1800); + + return switch (try stream.reader().readByte()) { + inline @enumToInt(Opcode.lo_inline)...@enumToInt(Opcode.hi_inline) => |opcode| blk: { + const e = @intToEnum(Opcode, opcode & 0b11000000); + const payload_type = std.meta.TagPayload(Instruction, e); + const value = try payload_type.read(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); + break :blk @unionInit(Instruction, @tagName(e), value); }, - else => error.UnknownOpcode, + inline @enumToInt(Opcode.lo_reserved)...@enumToInt(Opcode.hi_reserved) => |opcode| blk: { + const e = @intToEnum(Opcode, opcode); + const payload_type = std.meta.TagPayload(Instruction, e); + const value = try payload_type.read(stream, null, addr_size_bytes, endian); + break :blk @unionInit(Instruction, @tagName(e), value); + }, + Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode, + else => error.InvalidOpcode, }; } - pub fn writeOperands(self: Instruction, writer: anytype, cie: dwarf.CommonInformationEntry, arch: ?std.Target.Cpu.Arch) !void { + pub fn writeOperands( + self: Instruction, + writer: anytype, + cie: dwarf.CommonInformationEntry, + arch: ?std.Target.Cpu.Arch, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !void { switch (self) { - inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{ i.operands.delta * cie.code_alignment_factor }), + inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{i.operands.delta * cie.code_alignment_factor}), .offset => |i| { try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" {}", .{ @intCast(i64, i.operands.offset) * cie.data_alignment_factor }); + try writer.print(" {}", .{@intCast(i64, i.operands.offset) * cie.data_alignment_factor}); }, .restore => {}, .nop => {}, @@ -217,14 +235,14 @@ pub const Instruction = union(Opcode) { .restore_state => {}, .def_cfa => |i| { try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" {}", .{ fmtOffset(@intCast(i64, i.operands.offset)) }); + try writer.print(" {d:<1}", .{@intCast(i64, i.operands.offset)}); }, .def_cfa_register => {}, .def_cfa_offset => |i| { - try writer.print("{}", .{ fmtOffset(@intCast(i64, i.operands.offset)) }); + try writer.print("{d:<1}", .{@intCast(i64, i.operands.offset)}); }, .def_cfa_expression => |i| { - try writer.print("TODO(parse expressions data {x})", .{ std.fmt.fmtSliceHexLower(i.operands.block) }); + try writeExpression(writer, i.operands.block, arch, addr_size_bytes, endian); }, .expression => {}, .offset_extended_sf => {}, @@ -235,23 +253,83 @@ pub const Instruction = union(Opcode) { .val_expression => {}, } } - }; +fn writeExpression( + writer: anytype, + block: []const u8, + arch: ?std.Target.Cpu.Arch, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + var stream = std.io.fixedBufferStream(block); -fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - if (data >= 0) try writer.writeByte('+'); - return std.fmt.formatInt(data, 10, .lower, options, writer); + // Generate a lookup table from opcode value to name + const opcode_lut_len = 256; + const opcode_lut: [opcode_lut_len]?[]const u8 = comptime blk: { + var lut: [opcode_lut_len]?[]const u8 = [_]?[]const u8{null} ** opcode_lut_len; + for (@typeInfo(dwarf.OP).Struct.decls) |decl| { + lut[@as(u8, @field(dwarf.OP, decl.name))] = decl.name; + } + + break :blk lut; + }; + + switch (endian) { + inline .Little, .Big => |e| { + switch (addr_size_bytes) { + inline 2, 4, 8 => |size| { + const StackMachine = expressions.StackMachine(.{ + .addr_size = size, + .endian = e, + .call_frame_mode = true, + }); + + const reader = stream.reader(); + while (stream.pos < stream.buffer.len) { + if (stream.pos > 0) try writer.writeAll(", "); + + const opcode = try reader.readByte(); + if (opcode_lut[opcode]) |opcode_name| { + try writer.print("DW_OP_{s}", .{opcode_name}); + } else { + // TODO: See how llvm-dwarfdump prints these? + if (opcode >= dwarf.OP.lo_user and opcode <= dwarf.OP.lo_user) { + try writer.print("", .{opcode}); + } else { + try writer.print("", .{opcode}); + } + } + + if (try StackMachine.readOperand(&stream, opcode)) |value| { + switch (value) { + //.generic => |v| try writer.print("{d}", .{v}), + .generic => {}, // Constant values are implied by the opcode name + .register => |v| try writer.print(" {}", .{ abi.fmtRegister(v, arch) }), + .base_register => |v| try writer.print(" {}{d:<1}", .{ abi.fmtRegister(v.base_register, arch), v.offset }), + else => try writer.print(" TODO({s})", .{@tagName(value)}), + } + } + } + }, + else => return error.InvalidAddrSize, + } + }, + } } -fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) { - return .{ .data = offset }; -} +// fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { +// _ = fmt; +// if (data >= 0) try writer.writeByte('+'); +// return std.fmt.formatInt(data, 10, .lower, options, writer); +// } + +// fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) { +// return .{ .data = offset }; +// } /// See section 6.4.1 of the DWARF5 specification pub const VirtualMachine = struct { - const RegisterRule = union(enum) { undefined: void, same_value: void, @@ -263,11 +341,18 @@ pub const VirtualMachine = struct { architectural: void, }; - const Column = struct { + pub const Column = struct { register: u8 = undefined, rule: RegisterRule = .{ .undefined = {} }, - pub fn writeRule(self: Column, writer: anytype, is_cfa: bool, arch: ?std.Target.Cpu.Arch) !void { + pub fn writeRule( + self: Column, + writer: anytype, + is_cfa: bool, + arch: ?std.Target.Cpu.Arch, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !void { if (is_cfa) { try writer.writeAll("CFA"); } else { @@ -281,48 +366,54 @@ pub const VirtualMachine = struct { .offset => |offset| { if (is_cfa) { try abi.writeRegisterName(writer, arch, self.register); - try writer.print("{}", .{ fmtOffset(offset) }); + try writer.print("{d:<1}", .{offset}); } else { - try writer.print("[CFA{}]", .{ fmtOffset(offset) }); + try writer.print("[CFA{d:<1}]", .{offset}); } }, .val_offset => |offset| { if (is_cfa) { try abi.writeRegisterName(writer, arch, self.register); - try writer.print("{}", .{ fmtOffset(offset) }); + try writer.print("{d:<1}", .{offset}); } else { - try writer.print("CFA{}", .{ fmtOffset(offset) }); + try writer.print("CFA{d:<1}", .{offset}); } }, .register => |register| try abi.writeRegisterName(writer, arch, register), - .expression => try writer.writeAll("TODO(expression)"), + .expression => |expression| try writeExpression(writer, expression, arch, addr_size_bytes, endian), .val_expression => try writer.writeAll("TODO(val_expression)"), .architectural => try writer.writeAll("TODO(architectural)"), } } }; + /// Each row contains unwinding rules for a set of registers at a specific location in the program. pub const Row = struct { /// Offset from pc_begin offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived + /// from, while other columns define registers in terms of the CFA. cfa: Column = .{}, - /// Index into `columns` of the first column in this row + /// Index into `columns` of the first column in this row. columns_start: usize = undefined, columns_len: u8 = 0, }; - rows: std.ArrayListUnmanaged(Row) = .{}, columns: std.ArrayListUnmanaged(Column) = .{}, + row_stack: std.ArrayListUnmanaged(Row) = .{}, current_row: Row = .{}, + // TODO: Add stack machine stack + pub fn reset(self: *VirtualMachine) void { - self.rows.clearRetainingCapacity(); + self.row_stack.clearRetainingCapacity(); self.columns.clearRetainingCapacity(); self.current_row = .{}; } pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.rows.deinit(allocator); + self.row_stack.deinit(allocator); self.columns.deinit(allocator); self.* = undefined; } @@ -366,8 +457,20 @@ pub const VirtualMachine = struct { .undefined => {}, .same_value => {}, .register => {}, - .remember_state => {}, - .restore_state => {}, + .remember_state => { + + // TODO: The row stack only actually needs the column information + // TODO: Also it needs to copy the columns because changes can edit the referenced columns + // TODO: This function could push the column range onto the stack, the copy the columns and update current row + + try self.row_stack.append(allocator, self.current_row); + }, + .restore_state => { + if (self.row_stack.items.len == 0) return error.InvalidOperation; + const row = self.row_stack.pop(); + self.current_row.columns_len = row.columns_len; + self.current_row.columns_start = row.columns_start; + }, .def_cfa => |i| { self.current_row.cfa = .{ .register = i.operands.register, @@ -376,11 +479,14 @@ pub const VirtualMachine = struct { }, .def_cfa_register => {}, .def_cfa_offset => |i| { + self.current_row.cfa.rule = .{ .offset = @intCast(i64, i.operands.offset) }; + }, + .def_cfa_expression => |i| { + self.current_row.cfa.register = undefined; self.current_row.cfa.rule = .{ - .offset = @intCast(i64, i.operands.offset) + .expression = i.operands.block, }; }, - .def_cfa_expression => {}, .expression => {}, .offset_extended_sf => {}, .def_cfa_sf => {}, @@ -390,5 +496,4 @@ pub const VirtualMachine = struct { .val_expression => {}, } } - }; diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig new file mode 100644 index 0000000000..dd838ecf96 --- /dev/null +++ b/lib/std/dwarf/expressions.zig @@ -0,0 +1,197 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const OP = @import("OP.zig"); +const leb = @import("../leb128.zig"); + +pub const StackMachineOptions = struct { + /// The address size of the target architecture + addr_size: u8 = @sizeOf(usize), + + /// Endianess of the target architecture + endian: std.builtin.Endian = .Little, + + /// Restrict the stack machine to a subset of opcodes used in call frame instructions + call_frame_mode: bool = false, +}; + +/// A stack machine that can decode and run DWARF expressions. +/// Expressions can be decoded for non-native address size and endianness, +/// but can only be executed if the current target matches the configuration. +pub fn StackMachine(comptime options: StackMachineOptions) type { + const addr_type = switch(options.addr_size) { + 2 => u16, + 4 => u32, + 8 => u64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + const addr_type_signed = switch(options.addr_size) { + 2 => i16, + 4 => i32, + 8 => i64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + return struct { + const Value = union(enum) { + generic: addr_type, + const_type: []const u8, + register: u8, + base_register: struct { + base_register: u8, + offset: i64, + }, + composite_location: struct { + size: u64, + offset: i64, + }, + block: []const u8, + base_type: struct { + type_offset: u64, + value_bytes: []const u8, + }, + deref_type: struct { + size: u8, + offset: u64, + }, + }; + + stack: std.ArrayListUnmanaged(Value) = .{}, + + fn generic(value: anytype) Value { + const int_info = @typeInfo(@TypeOf(value)).Int; + if (@sizeOf(@TypeOf(value)) > options.addr_size) { + return .{ + .generic = switch (int_info.signedness) { + .signed => @bitCast(addr_type, @truncate(addr_type_signed, value)), + .unsigned => @truncate(addr_type, value), + } + }; + } else { + return .{ + .generic = switch (int_info.signedness) { + .signed => @bitCast(addr_type, @intCast(addr_type_signed, value)), + .unsigned => @intCast(addr_type, value), + } + }; + } + } + + pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8) !?Value { + const reader = stream.reader(); + return switch (opcode) { + OP.addr, + OP.call_ref, + => generic(try reader.readInt(addr_type, options.endian)), + OP.const1u, + OP.pick, + OP.deref_size, + OP.xderef_size, + => generic(try reader.readByte()), + OP.const1s => generic(try reader.readByteSigned()), + OP.const2u, + OP.call2, + OP.call4, + => generic(try reader.readInt(u16, options.endian)), + OP.const2s, + OP.bra, + OP.skip, + => generic(try reader.readInt(i16, options.endian)), + OP.const4u => generic(try reader.readInt(u32, options.endian)), + OP.const4s => generic(try reader.readInt(i32, options.endian)), + OP.const8u => generic(try reader.readInt(u64, options.endian)), + OP.const8s => generic(try reader.readInt(i64, options.endian)), + OP.constu, + OP.plus_uconst, + OP.addrx, + OP.constx, + OP.convert, + OP.reinterpret, + => generic(try leb.readULEB128(u64, reader)), + OP.consts, + OP.fbreg, + => generic(try leb.readILEB128(i64, reader)), + OP.lit0...OP.lit31 => |n| generic(n - OP.lit0), + OP.reg0...OP.reg31 => |n| .{ .register = n - OP.reg0 }, + OP.breg0...OP.breg31 => |n| .{ + .base_register = .{ + .base_register = n - OP.breg0, + .offset = try leb.readILEB128(i64, reader), + } + }, + OP.regx => .{ .register = try leb.readULEB128(u8, reader) }, + OP.bregx, + OP.regval_type => .{ + .base_register = .{ + .base_register = try leb.readULEB128(u8, reader), + .offset = try leb.readILEB128(i64, reader), + } + }, + OP.piece => .{ + .composite_location = .{ + .size = try leb.readULEB128(u8, reader), + .offset = 0, + }, + }, + OP.bit_piece => .{ + .composite_location = .{ + .size = try leb.readULEB128(u8, reader), + .offset = try leb.readILEB128(i64, reader), + }, + }, + OP.implicit_value, + OP.entry_value + => blk: { + const size = try leb.readULEB128(u8, reader); + if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; + const block = stream.buffer[stream.pos..][0..size]; + stream.pos += size; + break :blk .{ + .block = block, + }; + }, + OP.const_type => blk: { + const type_offset = try leb.readULEB128(u8, reader); + const size = try reader.readByte(); + if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; + const value_bytes = stream.buffer[stream.pos..][0..size]; + stream.pos += size; + break :blk .{ + .base_type = .{ + .type_offset = type_offset, + .value_bytes = value_bytes, + } + }; + }, + OP.deref_type, + OP.xderef_type, + => .{ + .deref_type = .{ + .size = try reader.readByte(), + .offset = try leb.readULEB128(u64, reader), + }, + }, + OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode, + else => null, + }; + } + + pub fn step( + self: *StackMachine, + stream: std.io.FixedBufferStream([]const u8), + allocator: std.mem.Allocator, + ) !void { + if (@sizeOf(usize) != addr_type or options.endian != builtin.target.cpu.arch.endian()) + @compileError("Execution of non-native address sizees / endianness is not supported"); + + const opcode = try stream.reader.readByte(); + _ = opcode; + _ = self; + _ = allocator; + + // switch (opcode) { + // OP.addr => try self.stack.append(allocator, try readOperand(stream, opcode)), + // } + } + }; +}